| @@ -25,15 +25,18 @@ namespace kernel { | |||
| using mindspore::device::TensorArrayMgr; | |||
| using mindspore::device::TensorArrayPtr; | |||
| TensorArrayStackCpuKernelMod::TensorArrayStackCpuKernelMod() | |||
| : handle_(0), value_size_(0), ele_size_(0), type_(nullptr) { | |||
| : handle_(0), value_size_(0), ele_size_(0), type_(nullptr), is_dynamic_(true) { | |||
| ResetResource(); | |||
| } | |||
| void TensorArrayStackCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| kernel_node_ = kernel_node; | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| auto shape = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, "element_shape"); | |||
| auto max_element = AnfAlgo::GetNodeAttr<int64_t>(kernel_node, "max_element"); | |||
| is_dynamic_ = AnfAlgo::GetNodeAttr<bool>(kernel_node, "is_dynamic_shape"); | |||
| auto size = AnfAlgo::GetNodeAttr<int64_t>(kernel_node, "size"); | |||
| for (auto i : shape) { | |||
| shapes_.push_back(LongToSize(i)); | |||
| } | |||
| @@ -42,7 +45,11 @@ void TensorArrayStackCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { | |||
| for (auto i : shapes_) { | |||
| ele_size_ *= i; | |||
| } | |||
| value_size_ = ele_size_ * LongToSize(max_element); | |||
| if (is_dynamic_) { | |||
| value_size_ = ele_size_ * LongToSize(max_element); | |||
| } else { | |||
| value_size_ = ele_size_ * LongToSize(size); | |||
| } | |||
| output_size_list_.push_back(value_size_); | |||
| input_size_list_.push_back(sizeof(int64_t)); | |||
| } | |||
| @@ -61,6 +68,7 @@ void TensorArrayStackCpuKernelMod::ResetResource() noexcept { | |||
| handle_ = 0; | |||
| value_size_ = 0; | |||
| ele_size_ = 0; | |||
| is_dynamic_ = true; | |||
| shapes_.clear(); | |||
| input_size_list_.clear(); | |||
| output_size_list_.clear(); | |||
| @@ -73,6 +81,14 @@ bool TensorArrayStackCpuKernelMod::Launch(const std::vector<AddressPtr> &inputs, | |||
| auto out_value = GetDeviceAddress<unsigned char>(outputs, 0); | |||
| MS_EXCEPTION_IF_NULL(out_value); | |||
| MS_EXCEPTION_IF_NULL(handle_addr); | |||
| // Set out_value to zeros when TensorArray in static size. | |||
| if (!is_dynamic_) { | |||
| auto ret = memset_s(out_value, outputs[0]->size, 0, value_size_); | |||
| if (ret != EOK) { | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', memset failed, errorno(" << ret << ")"; | |||
| } | |||
| } | |||
| handle_ = handle_addr[0]; | |||
| TensorArrayPtr tensors_ = TensorArrayMgr::GetInstance().GetTensorArray(handle_); | |||
| MS_EXCEPTION_IF_NULL(tensors_); | |||
| @@ -85,10 +101,12 @@ bool TensorArrayStackCpuKernelMod::Launch(const std::vector<AddressPtr> &inputs, | |||
| MS_EXCEPTION_IF_NULL(src_addr); | |||
| auto ret = memcpy_s(out_value + ele_size_ * i, out_ele_size, src_addr, ele_size_); | |||
| if (ret != EOK) { | |||
| MS_LOG(EXCEPTION) << "Memcpy failed, errorno(" << ret << ")"; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', memcpy failed, errorno(" << ret << ")"; | |||
| } | |||
| } | |||
| PostExecute(); | |||
| if (is_dynamic_) { | |||
| PostExecute(); | |||
| } | |||
| return true; | |||
| } | |||
| } // namespace kernel | |||
| @@ -45,6 +45,7 @@ class TensorArrayStackCpuKernelMod : public NativeCpuKernelMod { | |||
| size_t ele_size_; | |||
| std::vector<size_t> shapes_; | |||
| TypePtr type_; | |||
| bool is_dynamic_; | |||
| }; | |||
| MS_REG_CPU_KERNEL(TensorArrayStack, KernelAttr().AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt64), | |||
| @@ -42,9 +42,7 @@ bool TensorArrayClearKernelMod::Launch(const std::vector<AddressPtr> &inputs, co | |||
| auto handle_addr = GetDeviceAddress<int64_t>(inputs, 0); | |||
| MS_ERROR_IF_NULL(handle_addr); | |||
| int64_t handle = 0; | |||
| CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_, | |||
| cudaMemcpyAsync(&handle, handle_addr, sizeof(int64_t), cudaMemcpyDeviceToHost, | |||
| reinterpret_cast<cudaStream_t>(stream)), | |||
| CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_, cudaMemcpy(&handle, handle_addr, sizeof(int64_t), cudaMemcpyDeviceToHost), | |||
| "Get handle to host failed"); | |||
| TensorArrayPtr tensors_ = TensorArrayMgr::GetInstance().GetTensorArray(handle); | |||
| MS_ERROR_IF_NULL(tensors_); | |||
| @@ -42,9 +42,7 @@ bool TensorArrayCloseKernelMod::Launch(const std::vector<AddressPtr> &inputs, co | |||
| auto handle_addr = GetDeviceAddress<int64_t>(inputs, 0); | |||
| MS_ERROR_IF_NULL(handle_addr); | |||
| int64_t handle = 0; | |||
| CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_, | |||
| cudaMemcpyAsync(&handle, handle_addr, sizeof(int64_t), cudaMemcpyDeviceToHost, | |||
| reinterpret_cast<cudaStream_t>(stream)), | |||
| CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_, cudaMemcpy(&handle, handle_addr, sizeof(int64_t), cudaMemcpyDeviceToHost), | |||
| "Get handle to host failed"); | |||
| GPUTensorArrayPtr tensors_ = | |||
| std::dynamic_pointer_cast<GPUTensorArray>(TensorArrayMgr::GetInstance().GetTensorArray(handle)); | |||
| @@ -57,9 +57,7 @@ bool TensorArrayReadKernelMod::Launch(const std::vector<AddressPtr> &inputs, con | |||
| reinterpret_cast<cudaStream_t>(stream)), | |||
| "Get index to host failed"); | |||
| int64_t handle = 0; | |||
| CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_, | |||
| cudaMemcpyAsync(&handle, handle_addr, sizeof(int64_t), cudaMemcpyDeviceToHost, | |||
| reinterpret_cast<cudaStream_t>(stream)), | |||
| CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_, cudaMemcpy(&handle, handle_addr, sizeof(int64_t), cudaMemcpyDeviceToHost), | |||
| "Get handle to host failed"); | |||
| TensorArrayPtr tensors_ = TensorArrayMgr::GetInstance().GetTensorArray(handle); | |||
| MS_ERROR_IF_NULL(tensors_); | |||
| @@ -27,7 +27,7 @@ namespace kernel { | |||
| using mindspore::device::TensorArrayMgr; | |||
| using mindspore::device::TensorArrayPtr; | |||
| TensorArrayStackKernelMod::TensorArrayStackKernelMod() | |||
| : handle_(0), value_size_(0), ele_size_(0), stream_ptr_(nullptr), type_(nullptr) { | |||
| : handle_(0), value_size_(0), ele_size_(0), stream_ptr_(nullptr), type_(nullptr), is_dynamic_(true) { | |||
| ResetResource(); | |||
| } | |||
| @@ -36,6 +36,8 @@ bool TensorArrayStackKernelMod::Init(const CNodePtr &kernel_node) { | |||
| kernel_node_ = kernel_node; | |||
| auto shape = GetAttr<std::vector<int64_t>>(kernel_node, "element_shape"); | |||
| auto max_element = GetAttr<int64_t>(kernel_node, "max_element"); | |||
| is_dynamic_ = GetAttr<bool>(kernel_node, "is_dynamic_shape"); | |||
| auto size = GetAttr<int64_t>(kernel_node, "size"); | |||
| for (auto i : shape) { | |||
| shapes_.push_back(LongToSize(i)); | |||
| } | |||
| @@ -44,7 +46,14 @@ bool TensorArrayStackKernelMod::Init(const CNodePtr &kernel_node) { | |||
| for (auto i : shapes_) { | |||
| ele_size_ *= i; | |||
| } | |||
| value_size_ = ele_size_ * LongToSize(max_element); | |||
| if (is_dynamic_) { | |||
| value_size_ = ele_size_ * LongToSize(max_element); | |||
| } else { | |||
| if (size <= 0) { | |||
| MS_LOG(EXCEPTION) << "Size should larger than 0 when is_dynamic_shape = false, but get " << size; | |||
| } | |||
| value_size_ = ele_size_ * LongToSize(size); | |||
| } | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| @@ -84,9 +93,14 @@ bool TensorArrayStackKernelMod::Launch(const std::vector<AddressPtr> &inputs, co | |||
| auto out_value = GetDeviceAddress<unsigned char>(outputs, 0); | |||
| MS_ERROR_IF_NULL(out_value); | |||
| MS_ERROR_IF_NULL(handle_addr); | |||
| CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_, | |||
| cudaMemcpyAsync(&handle_, handle_addr, sizeof(int64_t), cudaMemcpyDeviceToHost, | |||
| reinterpret_cast<cudaStream_t>(stream_ptr)), | |||
| // Set out_value to zeros when TensorArray in static size. | |||
| if (!is_dynamic_) { | |||
| CHECK_CUDA_RET_WITH_EXCEPT( | |||
| kernel_node_, cudaMemsetAsync(out_value, 0, outputs[0]->size, reinterpret_cast<cudaStream_t>(stream_ptr)), | |||
| "Cudamemset output value failed"); | |||
| } | |||
| CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_, cudaMemcpy(&handle_, handle_addr, sizeof(int64_t), cudaMemcpyDeviceToHost), | |||
| "Get handle to host failed"); | |||
| TensorArrayPtr tensors_ = TensorArrayMgr::GetInstance().GetTensorArray(handle_); | |||
| MS_ERROR_IF_NULL(tensors_); | |||
| @@ -46,6 +46,7 @@ class TensorArrayStackKernelMod : public NativeGpuKernelMod { | |||
| void *stream_ptr_; | |||
| std::vector<size_t> shapes_; | |||
| TypePtr type_; | |||
| bool is_dynamic_; | |||
| }; | |||
| MS_REG_GPU_KERNEL(TensorArrayStack, TensorArrayStackKernelMod) | |||
| @@ -60,9 +60,7 @@ bool TensorArrayWriteKernelMod::Launch(const std::vector<AddressPtr> &inputs, co | |||
| reinterpret_cast<cudaStream_t>(stream)), | |||
| "Get indexd failed"); | |||
| int64_t handle = 0; | |||
| CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_, | |||
| cudaMemcpyAsync(&handle, handle_addr, sizeof(int64_t), cudaMemcpyDeviceToHost, | |||
| reinterpret_cast<cudaStream_t>(stream)), | |||
| CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_, cudaMemcpy(&handle, handle_addr, sizeof(int64_t), cudaMemcpyDeviceToHost), | |||
| "Get handle to host failed"); | |||
| GPUTensorArrayPtr tensors_ = | |||
| std::dynamic_pointer_cast<GPUTensorArray>(TensorArrayMgr::GetInstance().GetTensorArray(handle)); | |||
| @@ -42,19 +42,37 @@ AbstractBasePtr InferImplTensorArrayStack(const AnalysisEnginePtr &, const Primi | |||
| if (attr_dtype == nullptr) { | |||
| MS_LOG(EXCEPTION) << "No attribute [dtype] in " << op_name; | |||
| } | |||
| auto attr_is_dynamic = primitive->GetAttr("is_dynamic_shape"); | |||
| if (attr_is_dynamic == nullptr) { | |||
| MS_LOG(EXCEPTION) << "No attribute [is_dynamic_shape] in " << op_name; | |||
| } | |||
| auto attr_size = primitive->GetAttr("size"); | |||
| if (attr_size == nullptr) { | |||
| MS_LOG(EXCEPTION) << "No attribute [size] in " << op_name; | |||
| } | |||
| auto is_dynamic = GetValue<bool>(attr_is_dynamic); | |||
| auto size = GetValue<int64_t>(attr_size); | |||
| auto ele_shape = GetValue<std::vector<int64_t>>(attr_shape); | |||
| auto type = GetValue<TypePtr>(attr_dtype); | |||
| primitive->set_attr("max_element", MakeValue(kMaxElement)); | |||
| auto max_shape_ = ele_shape; | |||
| auto min_shape_ = ele_shape; | |||
| auto out_shape_ = ele_shape; | |||
| (void)max_shape_.insert(max_shape_.begin(), kMaxElement); | |||
| (void)min_shape_.insert(min_shape_.begin(), 1); | |||
| (void)out_shape_.insert(out_shape_.begin(), -1); | |||
| ShapeVector out_shape = out_shape_; | |||
| ShapeVector min_shape = min_shape_; | |||
| ShapeVector max_shape = max_shape_; | |||
| auto output = std::make_shared<AbstractTensor>(type, std::make_shared<Shape>(out_shape, min_shape, max_shape)); | |||
| std::shared_ptr<mindspore::abstract::AbstractTensor> output; | |||
| if (is_dynamic) { | |||
| auto max_shape_ = ele_shape; | |||
| auto min_shape_ = ele_shape; | |||
| auto out_shape_ = ele_shape; | |||
| (void)max_shape_.insert(max_shape_.begin(), kMaxElement); | |||
| (void)min_shape_.insert(min_shape_.begin(), 1); | |||
| (void)out_shape_.insert(out_shape_.begin(), -1); | |||
| ShapeVector out_shape = out_shape_; | |||
| ShapeVector min_shape = min_shape_; | |||
| ShapeVector max_shape = max_shape_; | |||
| output = std::make_shared<AbstractTensor>(type, std::make_shared<Shape>(out_shape, min_shape, max_shape)); | |||
| } else { | |||
| auto out_shape_ = ele_shape; | |||
| (void)out_shape_.insert(out_shape_.begin(), size); | |||
| ShapeVector out_shape = out_shape_; | |||
| output = std::make_shared<AbstractTensor>(type, std::make_shared<Shape>(out_shape)); | |||
| } | |||
| return output; | |||
| } | |||
| } // namespace abstract | |||
| @@ -66,7 +66,7 @@ class TensorArray(Cell): | |||
| self.tensor_array_read = ta.TensorArrayRead(dtype, element_shape) | |||
| self.tensor_array_close = ta.TensorArrayClose() | |||
| self.tensor_array_clear = ta.TensorArrayClear() | |||
| self.tensor_array_stack = ta.TensorArrayStack(dtype, element_shape) | |||
| self.tensor_array_stack = ta.TensorArrayStack(dtype, element_shape, dynamic_size, size) | |||
| self.tensor_array_size = ta.TensorArraySize() | |||
| def write(self, index, value): | |||
| @@ -250,12 +250,13 @@ class TensorArrayStack(Primitive): | |||
| [1 2] | |||
| """ | |||
| @prim_attr_register | |||
| def __init__(self, dtype, element_shape): | |||
| def __init__(self, dtype, element_shape, dynamic_size, size): | |||
| """Initialize TensorArrayStack""" | |||
| self.init_prim_io_names(inputs=[''], outputs=['output']) | |||
| self.add_prim_attr('dtype', dtype) | |||
| self.add_prim_attr('element_shape', element_shape) | |||
| self.add_prim_attr('is_dynamic_shape', True) | |||
| self.add_prim_attr('is_dynamic_shape', dynamic_size) | |||
| self.add_prim_attr('size', size) | |||
| self.add_prim_attr('side_effect_mem', True) | |||
| @@ -21,9 +21,9 @@ import mindspore.nn as nn | |||
| from mindspore import Tensor | |||
| class TensorArrayNet(nn.Cell): | |||
| def __init__(self, dtype, element_shape): | |||
| def __init__(self, dtype, element_shape, is_dynamic_shape=True, size=0): | |||
| super(TensorArrayNet, self).__init__() | |||
| self.ta = nn.TensorArray(dtype, element_shape) | |||
| self.ta = nn.TensorArray(dtype, element_shape, is_dynamic_shape, size) | |||
| def construct(self, index, value): | |||
| self.ta.write(index, value) | |||
| @@ -92,3 +92,22 @@ def test_tensorarray(): | |||
| assert np.allclose(v.asnumpy(), expect_s[1]) | |||
| assert np.allclose(s.asnumpy(), expect_s) | |||
| ta.close() | |||
| @pytest.mark.level0 | |||
| @pytest.mark.platform_x86_gpu_training | |||
| @pytest.mark.env_onecard | |||
| def test_static_tensorarray(): | |||
| """ | |||
| Feature: TensorArray cpu TEST. | |||
| Description: Test the static tensorarray. | |||
| Expectation: success. | |||
| """ | |||
| context.set_context(mode=context.GRAPH_MODE, device_target="CPU") | |||
| index = Tensor(0, mindspore.int64) | |||
| value = Tensor(5, mindspore.int64) | |||
| ta = TensorArrayNet(dtype=mindspore.int64, element_shape=(), is_dynamic_shape=False, size=12) | |||
| v, s = ta(index, value) | |||
| expect_v = 5 | |||
| expect_s = [5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] | |||
| assert np.allclose(s.asnumpy(), expect_s) | |||
| assert np.allclose(v.asnumpy(), expect_v) | |||
| @@ -21,9 +21,9 @@ import mindspore.nn as nn | |||
| from mindspore import Tensor | |||
| class TensorArrayNet(nn.Cell): | |||
| def __init__(self, dtype, element_shape): | |||
| def __init__(self, dtype, element_shape, is_dynamic_shape=True, size=0): | |||
| super(TensorArrayNet, self).__init__() | |||
| self.ta = nn.TensorArray(dtype, element_shape) | |||
| self.ta = nn.TensorArray(dtype, element_shape, is_dynamic_shape, size) | |||
| def construct(self, index, value): | |||
| for i in range(2): | |||
| @@ -59,43 +59,62 @@ def test_tensorarray(): | |||
| assert np.allclose(v.asnumpy(), expect_v) | |||
| context.set_context(mode=context.PYNATIVE_MODE, device_target="GPU") | |||
| ta = nn.TensorArray(mindspore.int64, ()) | |||
| tb = nn.TensorArray(mindspore.int64, ()) | |||
| for i in range(5): | |||
| ta.write(i, 99) | |||
| v = ta.read(0) | |||
| s = ta.stack() | |||
| tb.write(i, 99) | |||
| v = tb.read(0) | |||
| s = tb.stack() | |||
| expect_v = 99 | |||
| expect_s = [99, 99, 99, 99, 99] | |||
| assert np.allclose(s.asnumpy(), expect_s) | |||
| assert np.allclose(v.asnumpy(), expect_v) | |||
| ta_size = ta.size() | |||
| assert np.allclose(ta_size.asnumpy(), 5) | |||
| ta.clear() | |||
| ta_size = ta.size() | |||
| assert np.allclose(ta_size.asnumpy(), 0) | |||
| ta.write(0, 88) | |||
| v = ta.read(0) | |||
| s = ta.stack() | |||
| ta.close() | |||
| tb_size = tb.size() | |||
| assert np.allclose(tb_size.asnumpy(), 5) | |||
| tb.clear() | |||
| tb_size = tb.size() | |||
| assert np.allclose(tb_size.asnumpy(), 0) | |||
| tb.write(0, 88) | |||
| v = tb.read(0) | |||
| s = tb.stack() | |||
| tb.close() | |||
| expect_v = 88 | |||
| expect_s = [88] | |||
| assert np.allclose(s.asnumpy(), expect_s) | |||
| assert np.allclose(v.asnumpy(), expect_v) | |||
| ta = nn.TensorArray(mindspore.float32, ()) | |||
| ta.write(5, 1.) | |||
| s = ta.stack() | |||
| tc = nn.TensorArray(mindspore.float32, ()) | |||
| tc.write(5, 1.) | |||
| s = tc.stack() | |||
| expect_s = [0., 0., 0., 0., 0., 1.] | |||
| assert np.allclose(s.asnumpy(), expect_s) | |||
| ta.write(2, 1.) | |||
| s = ta.stack() | |||
| tc.write(2, 1.) | |||
| s = tc.stack() | |||
| expect_s = [0., 0., 1., 0., 0., 1.] | |||
| assert np.allclose(s.asnumpy(), expect_s) | |||
| ta.close() | |||
| ta = nn.TensorArray(mindspore.bool_, ()) | |||
| ta.write(1, Tensor(True, mindspore.bool_)) | |||
| s = ta.stack() | |||
| v = ta.read(1) | |||
| tc.close() | |||
| td = nn.TensorArray(mindspore.bool_, ()) | |||
| td.write(1, Tensor(True, mindspore.bool_)) | |||
| s = td.stack() | |||
| v = td.read(1) | |||
| expect_s = [False, True] | |||
| assert np.allclose(v.asnumpy(), expect_s[1]) | |||
| assert np.allclose(s.asnumpy(), expect_s) | |||
| ta.close() | |||
| td.close() | |||
| @pytest.mark.level0 | |||
| @pytest.mark.platform_x86_gpu_training | |||
| @pytest.mark.env_onecard | |||
| def test_static_tensorarray(): | |||
| """ | |||
| Feature: TensorArray gpu TEST. | |||
| Description: Test the static tensorarray. | |||
| Expectation: success. | |||
| """ | |||
| context.set_context(mode=context.GRAPH_MODE, device_target="GPU") | |||
| index = Tensor(0, mindspore.int64) | |||
| value = Tensor(5, mindspore.int64) | |||
| ta = TensorArrayNet(dtype=mindspore.int64, element_shape=(), is_dynamic_shape=False, size=12) | |||
| v, s = ta(index, value) | |||
| expect_v = 24 | |||
| expect_s = [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 0, 0] | |||
| assert np.allclose(s.asnumpy(), expect_s) | |||
| assert np.allclose(v.asnumpy(), expect_v) | |||