GitOrigin-RevId: 47373d291d
tags/v0.4.0
| @@ -38,9 +38,12 @@ def set_default_device(device: str = "xpux"): | |||
| :param device: default device type. The type can be 'cpu0', 'cpu1', etc., | |||
| or 'gpu0', 'gpu1', etc., to specify the particular cpu or gpu to use. | |||
| To specify multiple devices, use cpu0:1 or gpu0:2. | |||
| 'cpux' and 'gupx' can also be used to specify any number of cpu or gpu devices. | |||
| 'multithread' device type is avaliable when inference, which implements | |||
| multi-threading parallelism at the operator level. For example, | |||
| 'multithread4' will compute with 4 threads. which implements | |||
| The default value is 'xpux' to specify any device available. | |||
| It can also be set by environmental variable `MGE_DEFAULT_DEVICE`. | |||
| @@ -603,11 +603,11 @@ Args Args::from_argv(int argc, char **argv) { | |||
| ++ i; | |||
| ret.multithread_number = std::stoi(argv[i]); | |||
| ret.load_config.comp_node_mapper = | |||
| [nr_thread = | |||
| [nr_threads = | |||
| ret.multithread_number](CompNode::Locator& loc) { | |||
| loc.type = CompNode::DeviceType::MULTITHREAD; | |||
| loc.device = 0; | |||
| loc.stream = nr_thread; | |||
| loc.nr_threads = nr_threads; | |||
| }; | |||
| continue; | |||
| } | |||
| @@ -615,11 +615,12 @@ Args Args::from_argv(int argc, char **argv) { | |||
| mgb_log_warn("use multithread:default mode"); | |||
| ++i; | |||
| ret.multithread_number = std::stoi(argv[i]); | |||
| ret.load_config.comp_node_mapper = [nr_thread = | |||
| ret.multithread_number](CompNode::Locator& loc) { | |||
| ret.load_config.comp_node_mapper = [nr_threads = | |||
| ret.multithread_number]( | |||
| CompNode::Locator& loc) { | |||
| loc.type = CompNode::DeviceType::MULTITHREAD; | |||
| loc.device = CompNode::Locator::DEVICE_MULTITHREAD_DEFAULT; | |||
| loc.stream = nr_thread; | |||
| loc.nr_threads = nr_threads; | |||
| }; | |||
| continue; | |||
| } | |||
| @@ -127,13 +127,19 @@ CompNode::Locator CompNode::Locator::parse(const std::string &id) { | |||
| // current parsing location | |||
| const char *ptr = id.data(); | |||
| if (id == "cpu:default") { | |||
| return {DeviceType::CPU, DEVICE_CPU_DEFAULT, 0}; | |||
| return {DeviceType::CPU, DEVICE_CPU_DEFAULT, {0}}; | |||
| } | |||
| if (!strncmp(ptr, "multithread:default", 19)) { | |||
| //! the multithread default compnode string like "multithread:default:x" | |||
| ptr += 20; | |||
| int nr_thread =std::stoi(ptr); | |||
| return {DeviceType::MULTITHREAD, DEVICE_MULTITHREAD_DEFAULT, nr_thread}; | |||
| if (id.size() > 20) { | |||
| ptr += 20; | |||
| int nr_thread = std::stoi(ptr); | |||
| return {DeviceType::MULTITHREAD, | |||
| DEVICE_MULTITHREAD_DEFAULT, | |||
| {nr_thread}}; | |||
| } else { | |||
| err(); | |||
| } | |||
| } | |||
| DeviceType dev_type; | |||
| @@ -192,8 +198,16 @@ CompNode::Locator CompNode::Locator::parse(const std::string &id) { | |||
| int num_stream = parse_int(); | |||
| if (*ptr) | |||
| err(); | |||
| //! multi thread with thread number(num_stream) being zero is illegal | |||
| if (dev_type == DeviceType::MULTITHREAD) { | |||
| if (num_dev == 0) { | |||
| err(); | |||
| } | |||
| //! num_steam store the nr_thread | |||
| std::swap(num_dev, num_stream); | |||
| } | |||
| return {dev_type, num_dev, num_stream}; | |||
| return {dev_type, num_dev, {num_stream}}; | |||
| } | |||
| void CompNode::Locator::set_device_map(DeviceType type, int from, int to) { | |||
| @@ -242,16 +256,22 @@ CompNode::Locator CompNode::Locator::to_physical() const { | |||
| stream_physical = 1023; | |||
| } | |||
| } | |||
| return {type_physical, device_physical, stream_physical}; | |||
| return {type_physical, device_physical, {stream_physical}}; | |||
| } | |||
| std::string CompNode::Locator::to_string() const { | |||
| if (device == DEVICE_CPU_DEFAULT) { | |||
| return "cpu:default"; | |||
| } else if (device == DEVICE_MULTITHREAD_DEFAULT) { | |||
| std::string ret="multithread:default:"; | |||
| std::string ret = "multithread:default:"; | |||
| ret.append(get_stream_str(stream)); | |||
| return ret; | |||
| } else if (type == DeviceType::MULTITHREAD) { | |||
| std::string ret("multithread"); | |||
| ret.append(get_stream_str(stream)) | |||
| .append(":") | |||
| .append(get_stream_str(device)); | |||
| return ret; | |||
| } | |||
| char numstr[32]; | |||
| if (device == -1) { | |||
| @@ -380,9 +380,9 @@ class CpuCompNode::CompNodeImpl final: public CpuDispatchableBase { | |||
| m_locator_logical(locator_logical) { | |||
| auto cn = make_comp_node_from_impl(this); | |||
| if (locator.type == DeviceType::MULTITHREAD) { | |||
| //! When multi-thread the stream stand for thread number | |||
| m_thread_pool = std::unique_ptr<ThreadPool>( | |||
| new ThreadPool(static_cast<size_t>(locator.stream))); | |||
| m_thread_pool = std::unique_ptr<ThreadPool>(new ThreadPool( | |||
| static_cast<size_t>(locator.nr_threads))); | |||
| mgb_assert(m_thread_pool, "ThradPool create failed"); | |||
| } | |||
| if (locator.type == DeviceType::CPU) { | |||
| @@ -398,7 +398,6 @@ class CpuCompNode::CompNodeImpl final: public CpuDispatchableBase { | |||
| cn); | |||
| } | |||
| } else if (locator.type == DeviceType::MULTITHREAD) { | |||
| mgb_assert(m_thread_pool, "ThradPool create failed"); | |||
| if (locator.device == Locator::DEVICE_MULTITHREAD_DEFAULT) { | |||
| m_env.init_cpu( | |||
| {std::make_shared<InplaceCPUDispatcher>( | |||
| @@ -745,15 +744,14 @@ CpuCompNode::Impl* CpuCompNode::load_cpu(Locator locator, | |||
| } else { | |||
| mgb_assert(locator.type == DeviceType::MULTITHREAD); | |||
| auto&& pqueue_weak = sm_pool->physical2queue_multithead[{ | |||
| locator.device, locator.stream}]; | |||
| locator.device, locator.nr_threads}]; | |||
| auto pqueue = pqueue_weak.lock(); | |||
| if (!pqueue) { | |||
| pqueue = std::make_shared<WorkerQueue>(locator); | |||
| pqueue_weak = pqueue; | |||
| } | |||
| auto&& pimpl = sm_pool->logical2impl_multi_thread[{ | |||
| static_cast<int>(compact_logical_device), | |||
| locator_logical.stream}]; | |||
| compact_logical_device, locator_logical.nr_threads}]; | |||
| if (!pimpl) { | |||
| mgb_assert(sm_pool->nr_used_impl_storage < Pool::MAX_NR_COMP_NODE, | |||
| "too many cpu multithread comp nodes; max %d allowed", | |||
| @@ -153,8 +153,12 @@ class CompNode { | |||
| int device = -1; | |||
| //! multiple streams can execute on one computing device and share | |||
| //! memory | |||
| int stream = 0; | |||
| //! memory, when compnode type is multithread the field also stand | |||
| //! for nr_threads | |||
| union { | |||
| int stream = 0; | |||
| int nr_threads; | |||
| }; | |||
| /*! | |||
| * \brief parse a string identifier | |||
| @@ -162,7 +166,7 @@ class CompNode { | |||
| * currently supported ID format: (gpu|cpu)<n>[:m] where n is the | |||
| * device number, possibly with m as the stream id. | |||
| */ | |||
| static Locator parse(const std::string &id); | |||
| static Locator parse(const std::string& id); | |||
| /*! | |||
| * \brief set mapping between device numbers of a device type | |||
| @@ -28,9 +28,7 @@ using namespace mgb; | |||
| TEST(TestCompNode, Parse) { | |||
| using L = CompNode::Locator; | |||
| using D = CompNode::DeviceType; | |||
| auto make_lc = [](D t, int dev, int s) -> L { | |||
| return {t, dev, s}; | |||
| }; | |||
| auto make_lc = [](D t, int dev, int s) -> L { return {t, dev, {s}}; }; | |||
| ASSERT_EQ(L::parse("xpux"), make_lc(D::UNSPEC, -1, 0)); | |||
| ASSERT_EQ(L::parse("xpux:23"), make_lc(D::UNSPEC, -1, 23)); | |||
| @@ -47,10 +45,9 @@ TEST(TestCompNode, Parse) { | |||
| ASSERT_EQ(L::parse("xpu23"), make_lc(D::UNSPEC, 23, 0)); | |||
| ASSERT_EQ(L::parse("xpu23:1"), make_lc(D::UNSPEC, 23, 1)); | |||
| ASSERT_EQ(L::parse("cpu:default"), | |||
| make_lc(D::CPU, L::DEVICE_CPU_DEFAULT, 0)); | |||
| ASSERT_EQ(L::parse("multithread0:2"), make_lc(D::MULTITHREAD, 0, 2)); | |||
| ASSERT_EQ(L::parse("multithread1:3"), make_lc(D::MULTITHREAD, 1, 3)); | |||
| ASSERT_EQ(L::parse("cpu:default"), make_lc(D::CPU, L::DEVICE_CPU_DEFAULT, 0)); | |||
| ASSERT_EQ(L::parse("multithread2:0"), make_lc(D::MULTITHREAD, 0, 2)); | |||
| ASSERT_EQ(L::parse("multithread1:3"), make_lc(D::MULTITHREAD, 3, 1)); | |||
| ASSERT_EQ(L::parse("multithread:default:2"), | |||
| make_lc(D::MULTITHREAD, L::DEVICE_MULTITHREAD_DEFAULT, 2)); | |||
| @@ -65,6 +62,10 @@ TEST(TestCompNode, Parse) { | |||
| ASSERT_THROW(L::parse("heaxgon0"), MegBrainError); | |||
| ASSERT_THROW(L::parse("rcom0"), MegBrainError); | |||
| ASSERT_THROW(L::parse("cmabricon0"), MegBrainError); | |||
| ASSERT_THROW(L::parse("multithread"), MegBrainError); | |||
| ASSERT_THROW(L::parse("multithread1:"), MegBrainError); | |||
| ASSERT_THROW(L::parse("multithread1:default"), MegBrainError); | |||
| ASSERT_THROW(L::parse("multithread1:default:0"), MegBrainError); | |||
| } | |||
| TEST(TestCompNode, SetDefaultDev) { | |||
| @@ -107,12 +108,12 @@ TEST(TestCompNode, Load) { | |||
| #endif | |||
| #if MGB_HAVE_THREAD | |||
| auto cn_multi_thread0 = CompNode::load("multithread0:2"); | |||
| auto cn_multi_thread1 = CompNode::load("multithread1:2"); | |||
| ASSERT_EQ(CompNode::load("multithread0:2"), cn_multi_thread0); | |||
| ASSERT_EQ(CompNode::load("multithread1:2"), cn_multi_thread1); | |||
| ASSERT_NE(CompNode::load("multithread0:4"), cn_multi_thread0); | |||
| ASSERT_NE(CompNode::load("multithread1:4"), cn_multi_thread1); | |||
| auto cn_multi_thread0 = CompNode::load("multithread2:0"); | |||
| auto cn_multi_thread1 = CompNode::load("multithread2:1"); | |||
| ASSERT_EQ(CompNode::load("multithread2:0"), cn_multi_thread0); | |||
| ASSERT_EQ(CompNode::load("multithread2:1"), cn_multi_thread1); | |||
| ASSERT_NE(CompNode::load("multithread4:0"), cn_multi_thread0); | |||
| ASSERT_NE(CompNode::load("multithread4:1"), cn_multi_thread1); | |||
| auto cn_multi_default0 = CompNode::load("multithread:default:2"); | |||
| auto cn_multi_default1 = CompNode::load("multithread:default:4"); | |||
| @@ -139,7 +140,7 @@ TEST(TestCompNode, FreeAfterFinalize) { | |||
| auto type = static_cast<CompNode::DeviceType>(i); | |||
| if (!CompNode::get_device_count(type)) | |||
| continue; | |||
| auto cn = CompNode::load(CompNode::Locator{type}); | |||
| auto cn = CompNode::load(CompNode::Locator{type, -1, {0}}); | |||
| auto ptr = cn.alloc_device(123); | |||
| CompNode::finalize(); | |||
| cn.free_device(ptr); | |||
| @@ -190,13 +191,13 @@ TEST(TestCompNodeCPU, CoreAffinity) { | |||
| size_t data0, data1 = 0; | |||
| auto empty_task = []() {}; | |||
| auto cn0 = CompNode::load("cpu:default"), cn1 = CompNode::load("cpu0"), | |||
| cn2 = CompNode::load("multithread0:2"); | |||
| cn2 = CompNode::load("multithread2:0"); | |||
| auto binding0 = [&](size_t) { data0 = 10; }; | |||
| CompNodeEnv::from_comp_node(cn0).cpu_env().set_affinity(binding0); | |||
| CompNodeEnv::from_comp_node(cn0).cpu_env().dispatch(empty_task); | |||
| cn0.sync(); | |||
| auto binding1 = [&](size_t) { data1 = 20; }; | |||
| auto binding1 = [&](size_t ) { data1 = 20; }; | |||
| CompNodeEnv::from_comp_node(cn1).cpu_env().set_affinity(binding1); | |||
| CompNodeEnv::from_comp_node(cn1).cpu_env().dispatch(empty_task); | |||
| cn1.sync(); | |||
| @@ -238,7 +239,7 @@ TEST(TestCompNode, CPU_MULTI_THREAD) { | |||
| }; | |||
| for (auto&& str : std::vector<std::string>{ | |||
| "multithread0:2", "multithread0:4", "multithread:default:4"}) { | |||
| "multithread2:0", "multithread4:0", "multithread:default:4"}) { | |||
| auto cn0 = CompNode::load("cpu0"), cn1 = CompNode::load(str); | |||
| std::thread wk_thread0{std::ref(worker), std::ref(dst0), std::ref(cn0)}; | |||
| std::thread wk_thread1{std::ref(worker), std::ref(dst1), std::ref(cn1)}; | |||
| @@ -271,9 +272,9 @@ TEST(TestCompNodeCPU, PhysicalDispatch) { | |||
| L::set_device_map(DT, ID, 0); | |||
| L::set_device_map(DT, ID + 1, 0); | |||
| L::set_device_map(DT, ID + 2, 1); | |||
| auto cn0 = CompNode::load({DT, ID, 0}), | |||
| cn1 = CompNode::load({DT, ID + 1, 0}), | |||
| cn2 = CompNode::load({DT, ID + 2, 0}); | |||
| auto cn0 = CompNode::load({DT, ID, {0}}), | |||
| cn1 = CompNode::load({DT, ID + 1, {0}}), | |||
| cn2 = CompNode::load({DT, ID + 2, {0}}); | |||
| #if MGB_HAVE_THREAD | |||
| ASSERT_NE(cn0, cn1); | |||
| #else | |||
| @@ -532,10 +533,10 @@ TEST(TestCompNode, MultipleLoad) { | |||
| for (size_t i = 1; i < CompNode::NR_DEVICE_TYPE; ++i) { | |||
| auto dt = static_cast<CompNode::DeviceType>(i); | |||
| if (CompNode::get_device_count(dt)) { | |||
| auto cn = CompNode::load({dt}); | |||
| auto cn = CompNode::load({dt, 0, {0}}); | |||
| mgb_log("comp node %s is available", cn.to_string().c_str()); | |||
| run(cn); | |||
| cn = CompNode::load({dt}); | |||
| cn = CompNode::load({dt, 0, {0}}); | |||
| run(cn); | |||
| } | |||
| } | |||
| @@ -591,7 +592,7 @@ TYPED_TEST(TestCPUCompSeqRec, run_default_cpu) { | |||
| comp_node_test::seq_rec::run<TypeParam>(CompNode::load("cpu:default")); | |||
| } | |||
| TYPED_TEST(TestCPUCompSeqRec, run_multi_thread) { | |||
| auto cn = CompNode::load("multithread0:4"); | |||
| auto cn = CompNode::load("multithread4:0"); | |||
| comp_node_test::seq_rec::run<TypeParam>(cn); | |||
| } | |||