| @@ -17,9 +17,9 @@ using namespace test; | |||
| namespace { | |||
| template<class T> | |||
| template <class T> | |||
| std::vector<int32_t> create_offsets(const TensorShapeArray& shapes, | |||
| size_t alignment) { | |||
| size_t alignment) { | |||
| size_t dtype_size = sizeof(T); | |||
| if (alignment < dtype_size) | |||
| alignment = dtype_size; | |||
| @@ -41,14 +41,15 @@ std::vector<int32_t> create_offsets(const TensorShapeArray& shapes, | |||
| return offsets; | |||
| } | |||
| template<class T> | |||
| std::vector<T> create_pack(size_t pack_size, const std::vector<int32_t>& offsets, | |||
| const std::vector<std::vector<T>>& ptr) { | |||
| assert(pack_size == offsets.back()); | |||
| template <class T> | |||
| std::vector<T> create_pack(size_t pack_size, | |||
| const std::vector<int32_t>& offsets, | |||
| const std::vector<std::vector<T>>& ptr) { | |||
| megdnn_assert(pack_size == static_cast<size_t>(offsets.back())); | |||
| std::vector<T> data(pack_size, 0); | |||
| for (size_t i = 0; i * 2 < offsets.size(); ++i) { | |||
| size_t begin = offsets[i * 2], end = offsets[i * 2 +1]; | |||
| for (size_t j = 0;j < end - begin; j++) | |||
| size_t begin = offsets[i * 2], end = offsets[i * 2 + 1]; | |||
| for (size_t j = 0; j < end - begin; j++) | |||
| data[begin + j] = ptr[i][j]; | |||
| } | |||
| return data; | |||
| @@ -79,25 +80,23 @@ T* create_device_data(Handle* handle, const T* data, size_t size) { | |||
| template <class T> | |||
| void test_param_pack_concat(Handle* handle, const TensorShapeArray& shapes, | |||
| DType type) { | |||
| DType type) { | |||
| auto concat = handle->create_operator<ParamPackConcat>(); | |||
| size_t nr_params = shapes.size(); | |||
| std::vector<T*> param_ptrs; | |||
| std::vector<std::vector<T>> params = create_params<T>(nr_params, | |||
| shapes); | |||
| std::vector<std::vector<T>> params = create_params<T>(nr_params, shapes); | |||
| for (size_t i = 0; i < nr_params; ++i) { | |||
| param_ptrs.push_back(create_device_data<T>(handle, | |||
| params[i].data(), shapes[i].total_nr_elems())); | |||
| param_ptrs.push_back(create_device_data<T>(handle, params[i].data(), | |||
| shapes[i].total_nr_elems())); | |||
| } | |||
| std::vector<int32_t> offsets = | |||
| create_offsets<T>(shapes, handle->alignment_requirement()); | |||
| size_t pack_size = offsets.back(); | |||
| int32_t* offsets_gpu = create_device_data<int32_t>(handle, offsets.data(), | |||
| offsets.size()); | |||
| int32_t* offsets_gpu = | |||
| create_device_data<int32_t>(handle, offsets.data(), offsets.size()); | |||
| std::vector<T> expected_pack = | |||
| create_pack<T>(pack_size, offsets, params); | |||
| std::vector<T> expected_pack = create_pack<T>(pack_size, offsets, params); | |||
| T* pack_gpu = create_device_data<T>(handle, nullptr, expected_pack.size()); | |||
| TensorLayout dst_layout({pack_size}, type); | |||
| @@ -106,17 +105,18 @@ void test_param_pack_concat(Handle* handle, const TensorShapeArray& shapes, | |||
| TensorLayout offsets_layout({offsets.size()}, dtype::Int32()); | |||
| TensorND offsets_tensor(offsets_gpu, offsets_layout); | |||
| test::WorkspaceWrapper workspace(handle, concat->get_workspace_in_bytes( | |||
| shapes, offsets_layout, {pack_size})); | |||
| test::WorkspaceWrapper workspace( | |||
| handle, concat->get_workspace_in_bytes(shapes, offsets_layout, | |||
| {pack_size})); | |||
| TensorND src_tensor(param_ptrs.data(), | |||
| TensorLayout({nr_params}, dtype::Int32())); | |||
| TensorLayout({nr_params}, dtype::Int32())); | |||
| concat->exec(src_tensor, offsets_tensor, dst_tensor, workspace.workspace()); | |||
| // check | |||
| T* actual_pack = static_cast<T*>(malloc(pack_size * sizeof(T))); | |||
| test::megdnn_memcpy_D2H(handle, actual_pack, | |||
| pack_gpu, sizeof(T) * pack_size); | |||
| test::megdnn_memcpy_D2H(handle, actual_pack, pack_gpu, | |||
| sizeof(T) * pack_size); | |||
| for (size_t i = 0; i < pack_size; ++i) { | |||
| ASSERT_EQ(actual_pack[i], expected_pack[i]); | |||
| } | |||