| @@ -26,7 +26,7 @@ namespace kernel { | |||||
| template <typename T, typename S> | template <typename T, typename S> | ||||
| class UniqueGpuKernel : public GpuKernel { | class UniqueGpuKernel : public GpuKernel { | ||||
| public: | public: | ||||
| UniqueGpuKernel() : input_size_(0), output_size_(0), workspace_size_(0), num_elements_(1), post_output_size_(0) {} | |||||
| UniqueGpuKernel() { ResetResource(); } | |||||
| ~UniqueGpuKernel() override = default; | ~UniqueGpuKernel() override = default; | ||||
| const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; } | const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; } | ||||
| @@ -48,7 +48,7 @@ class UniqueGpuKernel : public GpuKernel { | |||||
| bool Init(const CNodePtr &kernel_node) override { | bool Init(const CNodePtr &kernel_node) override { | ||||
| kernel_node_ = kernel_node; | kernel_node_ = kernel_node; | ||||
| std::vector<size_t> shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||||
| std::vector<size_t> shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0); | |||||
| for (auto x : shape) { | for (auto x : shape) { | ||||
| num_elements_ *= x; | num_elements_ *= x; | ||||
| } | } | ||||
| @@ -77,6 +77,19 @@ class UniqueGpuKernel : public GpuKernel { | |||||
| AnfAlgo::SetOutputInferTypeAndShape(type_ids, shapes, kernel_node_.get()); | AnfAlgo::SetOutputInferTypeAndShape(type_ids, shapes, kernel_node_.get()); | ||||
| } | } | ||||
| void ResetResource() noexcept override { | |||||
| input_size_ = 0; | |||||
| output_size_ = 0; | |||||
| workspace_size_ = 0; | |||||
| num_elements_ = 1; | |||||
| post_output_size_ = 0; | |||||
| stream_ptr_ = nullptr; | |||||
| kernel_node_ = nullptr; | |||||
| input_size_list_.clear(); | |||||
| output_size_list_.clear(); | |||||
| workspace_size_list_.clear(); | |||||
| } | |||||
| protected: | protected: | ||||
| void InitSizeLists() override { | void InitSizeLists() override { | ||||
| input_size_list_.push_back(input_size_); | input_size_list_.push_back(input_size_); | ||||
| @@ -164,7 +164,10 @@ AbstractBasePtr InferImplUnique(const AnalysisEnginePtr &, const PrimitivePtr &p | |||||
| } | } | ||||
| ShapeVector ids_shape = {Shape::SHP_ANY}; | ShapeVector ids_shape = {Shape::SHP_ANY}; | ||||
| ShapeVector min_shape = {1}; | ShapeVector min_shape = {1}; | ||||
| ShapeVector max_shape = shape->shape(); | |||||
| ShapeVector max_shape = shape->max_shape(); | |||||
| if (max_shape.empty()) { | |||||
| max_shape = shape->shape(); | |||||
| } | |||||
| auto ids = | auto ids = | ||||
| std::make_shared<AbstractTensor>(input->element(), std::make_shared<Shape>(ids_shape, min_shape, max_shape)); | std::make_shared<AbstractTensor>(input->element(), std::make_shared<Shape>(ids_shape, min_shape, max_shape)); | ||||
| // Currently we choose the same data type as input for the idx. | // Currently we choose the same data type as input for the idx. | ||||
| @@ -174,7 +177,17 @@ AbstractBasePtr InferImplUnique(const AnalysisEnginePtr &, const PrimitivePtr &p | |||||
| if (input->element()->GetTypeTrack()->type_id() == TypeId::kNumberTypeInt64) { | if (input->element()->GetTypeTrack()->type_id() == TypeId::kNumberTypeInt64) { | ||||
| ids_idx_type = kInt64; | ids_idx_type = kInt64; | ||||
| } | } | ||||
| auto ids_idx = std::make_shared<AbstractTensor>(ids_idx_type, shape->shape()); | |||||
| ShapeVector idx_shape = shape->shape(); | |||||
| ShapeVector idx_min_shape = shape->min_shape(); | |||||
| if (idx_min_shape.empty()) { | |||||
| idx_min_shape = shape->shape(); | |||||
| } | |||||
| ShapeVector idx_max_shape = shape->max_shape(); | |||||
| if (idx_max_shape.empty()) { | |||||
| idx_max_shape = shape->shape(); | |||||
| } | |||||
| auto ids_idx = std::make_shared<AbstractTensor>(ids_idx_type, idx_shape); | |||||
| ids_idx->set_shape(std::make_shared<Shape>(idx_shape, idx_min_shape, idx_max_shape)); | |||||
| // outputs: ids, ids_idx | // outputs: ids, ids_idx | ||||
| AbstractBasePtrList elements = {ids, ids_idx}; | AbstractBasePtrList elements = {ids, ids_idx}; | ||||
| return std::make_shared<AbstractTuple>(elements); | return std::make_shared<AbstractTuple>(elements); | ||||
| @@ -20,7 +20,7 @@ import mindspore.context as context | |||||
| import mindspore.nn as nn | import mindspore.nn as nn | ||||
| from mindspore import Tensor | from mindspore import Tensor | ||||
| from mindspore.ops import operations as P | from mindspore.ops import operations as P | ||||
| from mindspore.ops.operations import _inner_ops as inner | |||||
| class NetUnique(nn.Cell): | class NetUnique(nn.Cell): | ||||
| def __init__(self): | def __init__(self): | ||||
| @@ -32,6 +32,20 @@ class NetUnique(nn.Cell): | |||||
| return x_unique, x_idx | return x_unique, x_idx | ||||
| class NetUniqueDynamic(nn.Cell): | |||||
| def __init__(self): | |||||
| super(NetUniqueDynamic, self).__init__() | |||||
| self.convert = inner.GpuConvertToDynamicShape() | |||||
| self.unique = P.Unique() | |||||
| self.split = P.Split(0, 2) | |||||
| def construct(self, x): | |||||
| x_convert = self.convert(x) | |||||
| x_unique, x_idx = self.unique(x_convert) | |||||
| x_split = self.split(x_unique) | |||||
| return x_unique, x_idx, x_split | |||||
| @pytest.mark.level0 | @pytest.mark.level0 | ||||
| @pytest.mark.platform_x86_gpu_training | @pytest.mark.platform_x86_gpu_training | ||||
| @pytest.mark.env_onecard | @pytest.mark.env_onecard | ||||
| @@ -224,3 +238,32 @@ def test_unique_large_int32(): | |||||
| x_unique, x_idx = net(x) | x_unique, x_idx = net(x) | ||||
| assert (x_unique.asnumpy() == exp_output).all() | assert (x_unique.asnumpy() == exp_output).all() | ||||
| assert (x_idx.asnumpy() == exp_idx).all() | assert (x_idx.asnumpy() == exp_idx).all() | ||||
| @pytest.mark.level0 | |||||
| @pytest.mark.platform_x86_gpu_training | |||||
| @pytest.mark.env_onecard | |||||
| def test_unique_dynamic(): | |||||
| x = Tensor(np.array([4, 5, 1, 2, 3, 3, 4, 5, 6]).astype(np.float32)) | |||||
| expt_unique = np.array([1, 2, 3, 4, 5, 6]).astype(np.float32) | |||||
| expt_index = np.array([3, 4, 0, 1, 2, 2, 3, 4, 5]).astype(np.int32) | |||||
| expt_split = np.array([[1, 2, 3], [4, 5, 6]]).astype(np.float32) | |||||
| x2 = Tensor(np.array([1, 1, 4, 4, 7, 8, 8]).astype(np.float32)) | |||||
| expt_unique2 = np.array([1, 4, 7, 8]).astype(np.float32) | |||||
| expt_index2 = np.array([0, 0, 1, 1, 2, 3, 3]).astype(np.int32) | |||||
| expt_split2 = np.array([[1, 4], [7, 8]]).astype(np.float32) | |||||
| context.set_context(mode=context.GRAPH_MODE, device_target="GPU") | |||||
| net = NetUniqueDynamic() | |||||
| x_unique, x_idx, x_split = net(x) | |||||
| assert (x_unique.asnumpy() == expt_unique).all() | |||||
| assert (x_idx.asnumpy() == expt_index).all() | |||||
| for i, out in enumerate(x_split): | |||||
| assert (out.asnumpy() == expt_split[i]).all() | |||||
| x_unique2, x_idx2, x_split2 = net(x2) | |||||
| assert (x_unique2.asnumpy() == expt_unique2).all() | |||||
| assert (x_idx2.asnumpy() == expt_index2).all() | |||||
| for i, out in enumerate(x_split2): | |||||
| assert (out.asnumpy() == expt_split2[i]).all() | |||||