|
- /**
- * \file src/opr/test/muxing.cpp
- * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
- *
- * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- */
-
- #include "megbrain/opr/muxing.h"
- #include <random>
- #include "megbrain/opr/blas.h"
- #include "megbrain/opr/io.h"
- #include "megbrain/opr/tensor_manip.h"
- #include "megbrain/opr/utility.h"
- #include "megbrain/test/helper.h"
- #include "megbrain/test/numerical_diff.h"
-
- using namespace mgb;
-
- namespace {
-
- void run_all_gather(
- const std::vector<size_t>& axis_size, bool& success, int axis,
- bool make_sleep = true, bool check_gx = false) {
- success = false;
- size_t SIZE0 = 34, SIZE1 = 47;
- if (check_gx) {
- SIZE0 = 3;
- SIZE1 = 4;
- }
- std::vector<double> sleep_time;
- size_t tot_axis_size = 0;
- for (size_t i = 0; i < axis_size.size(); ++i) {
- sleep_time.push_back(i * 0.05 + 0.1);
- tot_axis_size += axis_size[i];
- }
- #if __cplusplus >= 201703L
- std::default_random_engine rng_engine;
- std::shuffle(sleep_time.begin(), sleep_time.end(), rng_engine);
- #else
- std::random_shuffle(sleep_time.begin(), sleep_time.end());
- #endif
-
- auto constexpr DEVICE_TYPE = CompNode::DeviceType::CUDA;
- size_t nr_dev = std::min<size_t>(CompNode::get_device_count(DEVICE_TYPE), 4);
- HostTensorGenerator<> gen;
- std::vector<std::shared_ptr<HostTensorND>> host_x, host_lossp;
- for (size_t i = 0; i < axis_size.size(); ++i) {
- // test both cases of non-overlapping and overlapping comp nodes
- int stream = axis_size.size() % 2 ? i / nr_dev : 0;
- auto cn = CompNode::load({DEVICE_TYPE, static_cast<int>(i % nr_dev), stream});
- host_x.push_back(gen({SIZE0, axis_size[i], SIZE1}, cn));
- host_lossp.push_back(gen({SIZE0, tot_axis_size, SIZE1}, cn));
- }
-
- auto graph = ComputingGraph::make();
- SymbolVarArray dev_x, dev_x_delay, dev_lossp;
- for (size_t i = 0; i < axis_size.size(); ++i) {
- dev_x.push_back(opr::Host2DeviceCopy::make(*graph, host_x[i]));
- dev_lossp.push_back(opr::Host2DeviceCopy::make(*graph, host_lossp[i]));
- auto delay = dev_x.back();
- if (make_sleep)
- delay = opr::Sleep::make(delay, sleep_time[i]);
- dev_x_delay.push_back(delay);
- }
-
- auto dev_y = opr::AllGather::make(dev_x_delay, axis);
-
- SymbolVarArray dev_gx;
-
- SymbolVar loss;
- if (check_gx) {
- ASSERT_EQ(axis_size.size(), dev_y.size());
- TensorShape shp = {SIZE0 * tot_axis_size * SIZE1};
- auto cn = CompNode::load("gpu0");
-
- for (size_t i = 0; i < axis_size.size(); ++i) {
- auto cur_loss =
- opr::Dot::make(dev_y[i].reshape(shp), dev_lossp[i].reshape(shp))
- .rename(ssprintf("loss%zd", i));
- if (cn != cur_loss.node()->comp_node()) {
- cur_loss = opr::Copy::make(cur_loss, cn);
- }
- if (loss.node())
- loss = loss + cur_loss;
- else
- loss = cur_loss;
- }
-
- for (auto&& i : dev_x)
- dev_gx.push_back(cg::grad(loss, i));
- }
-
- ComputingGraph::OutputSpec outspec;
- std::vector<HostTensorND> host_y(dev_y.size()), host_gx(host_x.size());
- for (size_t i = 0; i < axis_size.size(); ++i) {
- outspec.push_back(make_callback_copy(dev_y[i], host_y[i]));
- if (check_gx)
- outspec.push_back(make_callback_copy(dev_gx[i], host_gx[i]));
- }
-
- auto func = graph->compile(outspec);
- func->execute();
- mgb_log("exec_time=%.3fms; axis_size=%zd", func->wait().get_prev_exec_time() * 1e3,
- axis_size.size());
-
- {
- // check y
- HostTensorND expected{CompNode::load("gpu0"), dtype::Float32()};
- {
- expected.resize({SIZE0, tot_axis_size, SIZE1});
- size_t start = 0;
- for (auto&& i : host_x) {
- auto end = start + i->shape().shape[1];
- for (size_t slice = 0; slice < SIZE0; ++slice) {
- memcpy(expected.ptr<float>({slice, start, 0}),
- i->ptr<float>({slice}),
- (end - start) * SIZE1 * sizeof(float));
- }
- start = end;
- }
- }
-
- for (auto&& i : host_y)
- MGB_ASSERT_TENSOR_EQ(expected, i);
- }
-
- if (check_gx) {
- std::vector<HostTensorND*> inp;
- for (auto&& i : host_x)
- inp.push_back(i.get());
-
- HostTensorND host_loss;
- auto func = graph->compile({make_callback_copy(loss, host_loss)});
-
- auto cost = [&]() {
- func->execute();
- return host_loss.ptr<float>()[0];
- };
- auto diff = numerical_diff_pt2(
- inp, cost, std::vector<Maybe<float>>(inp.size(), 1.f));
-
- for (size_t i = 0; i < axis_size.size(); ++i)
- MGB_ASSERT_TENSOR_NEAR(diff.at(i), host_gx.at(i), 1e-4);
- }
-
- success = true;
- }
-
- } // anonymous namespace
-
- TEST(TestMuxing, AllGather) {
- REQUIRE_GPU(4);
- bool success;
- run_all_gather({2}, success, 1, false, true);
- ASSERT_TRUE(success) << "failed grad 1";
- run_all_gather({2, 3, 4, 5}, success, 1, false, true);
- ASSERT_TRUE(success) << "failed grad 4";
-
- std::mt19937 rng;
- std::vector<size_t> sizes;
- for (size_t i = 1; i <= 8; i++) {
- sizes.push_back(10 + rng() % 10);
- run_all_gather(sizes, success, 1);
- ASSERT_TRUE(success) << ssprintf("failed at axis_size %zd", i);
- }
- run_all_gather(sizes, success, 1, false);
- };
-
- TEST(TestMuxing, AllGatherWithNegativeAxis) {
- REQUIRE_GPU(4);
- bool success;
- run_all_gather({2}, success, -2, false, true);
- ASSERT_TRUE(success) << "failed grad 1";
- run_all_gather({2, 3, 4, 5}, success, -2, false, true);
- ASSERT_TRUE(success) << "failed grad 4";
-
- std::mt19937 rng;
- std::vector<size_t> sizes;
- for (size_t i = 1; i <= 8; i++) {
- sizes.push_back(10 + rng() % 10);
- run_all_gather(sizes, success, -2);
- ASSERT_TRUE(success) << ssprintf("failed at axis_size %zd", i);
- }
- run_all_gather(sizes, success, -2, false);
- };
-
- // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}
|