| @@ -25,7 +25,7 @@ namespace kernel { | |||
| template <typename T> | |||
| class OnesLikeGpuKernelMod : public NativeGpuKernelMod { | |||
| public: | |||
| OnesLikeGpuKernelMod() : input_size_(0), output_size_(0), is_null_input_(false) {} | |||
| OnesLikeGpuKernelMod() : input_size_(0), output_size_(0), is_null_input_(false) { ResetResource(); } | |||
| ~OnesLikeGpuKernelMod() override = default; | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &, | |||
| @@ -68,6 +68,15 @@ class OnesLikeGpuKernelMod : public NativeGpuKernelMod { | |||
| return true; | |||
| } | |||
| void ResetResource() noexcept override { | |||
| input_size_ = 0; | |||
| output_size_ = 0; | |||
| is_null_input_ = false; | |||
| input_size_list_.clear(); | |||
| output_size_list_.clear(); | |||
| workspace_size_list_.clear(); | |||
| } | |||
| protected: | |||
| void InitSizeLists() override { | |||
| input_size_list_.push_back(input_size_); | |||
| @@ -44,7 +44,9 @@ class BiasAddGradGpuKernelMod : public NativeGpuKernelMod { | |||
| cudnn_compute_format_(CUDNN_TENSOR_NCHW), | |||
| dy_desc_(nullptr), | |||
| db_desc_(nullptr), | |||
| op_desc_(nullptr) {} | |||
| op_desc_(nullptr) { | |||
| ResetResource(); | |||
| } | |||
| ~BiasAddGradGpuKernelMod() override { DestroyResource(); } | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| @@ -139,6 +141,29 @@ class BiasAddGradGpuKernelMod : public NativeGpuKernelMod { | |||
| } | |||
| } | |||
| void ResetResource() noexcept override { | |||
| same_dims_ = true; | |||
| is_null_input_ = false; | |||
| kernel_name_ = "BiasAddGrad"; | |||
| use_cudnn_ = false; | |||
| dy_num_ = 1; | |||
| db_num_ = 1; | |||
| num_dims_ = 0; | |||
| bias_size_ = 0; | |||
| dy_shape_.clear(); | |||
| db_shape_.clear(); | |||
| data_format_ = kOpFormat_NCHW; | |||
| cudnn_handle_ = nullptr; | |||
| cudnn_data_type_ = CUDNN_DATA_FLOAT; | |||
| cudnn_compute_format_ = CUDNN_TENSOR_NCHW; | |||
| dy_desc_ = nullptr; | |||
| db_desc_ = nullptr; | |||
| op_desc_ = nullptr; | |||
| input_size_list_.clear(); | |||
| output_size_list_.clear(); | |||
| workspace_size_list_.clear(); | |||
| } | |||
| protected: | |||
| void MethodSelection() { | |||
| // opt implementation can only process num_dims_ <= 4 | |||
| @@ -83,7 +83,9 @@ class CtcLossGpuKernelMod : public NativeGpuKernelMod { | |||
| batch_label(0), | |||
| label_value_with_blank(nullptr), | |||
| log_alpha_b(nullptr), | |||
| log_beta_b(nullptr) {} | |||
| log_beta_b(nullptr) { | |||
| ResetResource(); | |||
| } | |||
| ~CtcLossGpuKernelMod() override = default; | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| @@ -145,6 +147,44 @@ class CtcLossGpuKernelMod : public NativeGpuKernelMod { | |||
| return true; | |||
| } | |||
| void ResetResource() noexcept override { | |||
| input_size_list_.clear(); | |||
| output_size_list_.clear(); | |||
| workspace_size_list_.clear(); | |||
| label_indice_size_ = 0; | |||
| label_size_ = 0; | |||
| sequence_lengths_size_ = 0; | |||
| preprocess_collapse_repeated_ = false; | |||
| ctc_merge_repeated_ = true; | |||
| ignore_longer_outputs_than_inputs_ = false; | |||
| is_null_input_ = false; | |||
| kernel_name_ = "CTCLoss"; | |||
| probs = nullptr; | |||
| label_indices = nullptr; | |||
| label_values = nullptr; | |||
| sequence_length = nullptr; | |||
| costs = nullptr; | |||
| grads = nullptr; | |||
| softmax_probs = nullptr; | |||
| cum_labels_length = nullptr; | |||
| label_squence_length = nullptr; | |||
| label_value_sp = nullptr; | |||
| label_value_pcr = nullptr; | |||
| prob_num = nullptr; | |||
| precum_labels_length = nullptr; | |||
| max_labels_length = nullptr; | |||
| numclass = 0; | |||
| batch = 0; | |||
| max_time = 0; | |||
| max_sequence = 0; | |||
| max_labels_length_host = 0; | |||
| batch_label = 0; | |||
| label_value_with_blank = nullptr; | |||
| log_alpha_b = nullptr; | |||
| log_beta_b = nullptr; | |||
| workspace_size_list_.clear(); | |||
| } | |||
| protected: | |||
| void LaunchInit(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs) { | |||
| @@ -27,7 +27,7 @@ namespace kernel { | |||
| template <typename T> | |||
| class LayerNormGpuKernelMod : public NativeGpuKernelMod { | |||
| public: | |||
| LayerNormGpuKernelMod() : input_row_(1), input_col_(1), param_dim_(1), is_null_input_(false) {} | |||
| LayerNormGpuKernelMod() : input_row_(1), input_col_(1), param_dim_(1), is_null_input_(false) { ResetResource(); } | |||
| ~LayerNormGpuKernelMod() override = default; | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &, | |||
| @@ -87,6 +87,16 @@ class LayerNormGpuKernelMod : public NativeGpuKernelMod { | |||
| return true; | |||
| } | |||
| void ResetResource() noexcept override { | |||
| input_row_ = 1; | |||
| input_col_ = 1; | |||
| param_dim_ = 1; | |||
| is_null_input_ = false; | |||
| input_size_list_.clear(); | |||
| output_size_list_.clear(); | |||
| workspace_size_list_.clear(); | |||
| } | |||
| protected: | |||
| void InitSizeLists() override { | |||
| input_size_list_.push_back(input_row_ * input_col_ * sizeof(T)); | |||
| @@ -47,7 +47,9 @@ class SoftmaxGpuKernelMod : public NativeGpuKernelMod { | |||
| batch_size_(0), | |||
| channel_size_(0), | |||
| height_(0), | |||
| width_(0) {} | |||
| width_(0) { | |||
| ResetResource(); | |||
| } | |||
| ~SoftmaxGpuKernelMod() override { DestroyResource(); } | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| @@ -155,6 +157,33 @@ class SoftmaxGpuKernelMod : public NativeGpuKernelMod { | |||
| "destroy input_descriptor failed"); | |||
| } | |||
| void ResetResource() noexcept override { | |||
| cudnn_handle_ = nullptr; | |||
| input_descriptor_ = nullptr; | |||
| output_descriptor_ = nullptr; | |||
| algo_ = CUDNN_SOFTMAX_ACCURATE; | |||
| mode_ = CUDNN_SOFTMAX_MODE_INSTANCE; | |||
| cudnn_data_type_ = CUDNN_DATA_FLOAT; | |||
| is_null_input_ = false; | |||
| kernel_name_ = "Softmax"; | |||
| input_size_ = 0; | |||
| output_size_ = 0; | |||
| workspace_size_ = 0; | |||
| input_size_list_.clear(); | |||
| output_size_list_.clear(); | |||
| workspace_size_list_.clear(); | |||
| input_shape_.clear(); | |||
| transpose_shape_.clear(); | |||
| transpose_axis_.clear(); | |||
| need_transpose_ = false; | |||
| shape_size_ = 0; | |||
| batch_size_ = 0; | |||
| channel_size_ = 0; | |||
| height_ = 0; | |||
| width_ = 0; | |||
| workspace_size_list_.clear(); | |||
| } | |||
| protected: | |||
| void InitResource() override { | |||
| cudnn_handle_ = device::gpu::GPUDeviceManager::GetInstance().GetCudnnHandle(); | |||