| @@ -17,9 +17,9 @@ using namespace test; | |||||
| namespace { | namespace { | ||||
| template<class T> | |||||
| template <class T> | |||||
| std::vector<int32_t> create_offsets(const TensorShapeArray& shapes, | std::vector<int32_t> create_offsets(const TensorShapeArray& shapes, | ||||
| size_t alignment) { | |||||
| size_t alignment) { | |||||
| size_t dtype_size = sizeof(T); | size_t dtype_size = sizeof(T); | ||||
| if (alignment < dtype_size) | if (alignment < dtype_size) | ||||
| alignment = dtype_size; | alignment = dtype_size; | ||||
| @@ -41,14 +41,15 @@ std::vector<int32_t> create_offsets(const TensorShapeArray& shapes, | |||||
| return offsets; | return offsets; | ||||
| } | } | ||||
| template<class T> | |||||
| std::vector<T> create_pack(size_t pack_size, const std::vector<int32_t>& offsets, | |||||
| const std::vector<std::vector<T>>& ptr) { | |||||
| assert(pack_size == offsets.back()); | |||||
| template <class T> | |||||
| std::vector<T> create_pack(size_t pack_size, | |||||
| const std::vector<int32_t>& offsets, | |||||
| const std::vector<std::vector<T>>& ptr) { | |||||
| megdnn_assert(pack_size == static_cast<size_t>(offsets.back())); | |||||
| std::vector<T> data(pack_size, 0); | std::vector<T> data(pack_size, 0); | ||||
| for (size_t i = 0; i * 2 < offsets.size(); ++i) { | for (size_t i = 0; i * 2 < offsets.size(); ++i) { | ||||
| size_t begin = offsets[i * 2], end = offsets[i * 2 +1]; | |||||
| for (size_t j = 0;j < end - begin; j++) | |||||
| size_t begin = offsets[i * 2], end = offsets[i * 2 + 1]; | |||||
| for (size_t j = 0; j < end - begin; j++) | |||||
| data[begin + j] = ptr[i][j]; | data[begin + j] = ptr[i][j]; | ||||
| } | } | ||||
| return data; | return data; | ||||
| @@ -79,25 +80,23 @@ T* create_device_data(Handle* handle, const T* data, size_t size) { | |||||
| template <class T> | template <class T> | ||||
| void test_param_pack_concat(Handle* handle, const TensorShapeArray& shapes, | void test_param_pack_concat(Handle* handle, const TensorShapeArray& shapes, | ||||
| DType type) { | |||||
| DType type) { | |||||
| auto concat = handle->create_operator<ParamPackConcat>(); | auto concat = handle->create_operator<ParamPackConcat>(); | ||||
| size_t nr_params = shapes.size(); | size_t nr_params = shapes.size(); | ||||
| std::vector<T*> param_ptrs; | std::vector<T*> param_ptrs; | ||||
| std::vector<std::vector<T>> params = create_params<T>(nr_params, | |||||
| shapes); | |||||
| std::vector<std::vector<T>> params = create_params<T>(nr_params, shapes); | |||||
| for (size_t i = 0; i < nr_params; ++i) { | for (size_t i = 0; i < nr_params; ++i) { | ||||
| param_ptrs.push_back(create_device_data<T>(handle, | |||||
| params[i].data(), shapes[i].total_nr_elems())); | |||||
| param_ptrs.push_back(create_device_data<T>(handle, params[i].data(), | |||||
| shapes[i].total_nr_elems())); | |||||
| } | } | ||||
| std::vector<int32_t> offsets = | std::vector<int32_t> offsets = | ||||
| create_offsets<T>(shapes, handle->alignment_requirement()); | create_offsets<T>(shapes, handle->alignment_requirement()); | ||||
| size_t pack_size = offsets.back(); | size_t pack_size = offsets.back(); | ||||
| int32_t* offsets_gpu = create_device_data<int32_t>(handle, offsets.data(), | |||||
| offsets.size()); | |||||
| int32_t* offsets_gpu = | |||||
| create_device_data<int32_t>(handle, offsets.data(), offsets.size()); | |||||
| std::vector<T> expected_pack = | |||||
| create_pack<T>(pack_size, offsets, params); | |||||
| std::vector<T> expected_pack = create_pack<T>(pack_size, offsets, params); | |||||
| T* pack_gpu = create_device_data<T>(handle, nullptr, expected_pack.size()); | T* pack_gpu = create_device_data<T>(handle, nullptr, expected_pack.size()); | ||||
| TensorLayout dst_layout({pack_size}, type); | TensorLayout dst_layout({pack_size}, type); | ||||
| @@ -106,17 +105,18 @@ void test_param_pack_concat(Handle* handle, const TensorShapeArray& shapes, | |||||
| TensorLayout offsets_layout({offsets.size()}, dtype::Int32()); | TensorLayout offsets_layout({offsets.size()}, dtype::Int32()); | ||||
| TensorND offsets_tensor(offsets_gpu, offsets_layout); | TensorND offsets_tensor(offsets_gpu, offsets_layout); | ||||
| test::WorkspaceWrapper workspace(handle, concat->get_workspace_in_bytes( | |||||
| shapes, offsets_layout, {pack_size})); | |||||
| test::WorkspaceWrapper workspace( | |||||
| handle, concat->get_workspace_in_bytes(shapes, offsets_layout, | |||||
| {pack_size})); | |||||
| TensorND src_tensor(param_ptrs.data(), | TensorND src_tensor(param_ptrs.data(), | ||||
| TensorLayout({nr_params}, dtype::Int32())); | |||||
| TensorLayout({nr_params}, dtype::Int32())); | |||||
| concat->exec(src_tensor, offsets_tensor, dst_tensor, workspace.workspace()); | concat->exec(src_tensor, offsets_tensor, dst_tensor, workspace.workspace()); | ||||
| // check | // check | ||||
| T* actual_pack = static_cast<T*>(malloc(pack_size * sizeof(T))); | T* actual_pack = static_cast<T*>(malloc(pack_size * sizeof(T))); | ||||
| test::megdnn_memcpy_D2H(handle, actual_pack, | |||||
| pack_gpu, sizeof(T) * pack_size); | |||||
| test::megdnn_memcpy_D2H(handle, actual_pack, pack_gpu, | |||||
| sizeof(T) * pack_size); | |||||
| for (size_t i = 0; i < pack_size; ++i) { | for (size_t i = 0; i < pack_size; ++i) { | ||||
| ASSERT_EQ(actual_pack[i], expected_pack[i]); | ASSERT_EQ(actual_pack[i], expected_pack[i]); | ||||
| } | } | ||||