|
|
|
@@ -35,7 +35,8 @@ class BiasAddGpuKernel : public GpuKernel { |
|
|
|
cudnn_data_type_(CUDNN_DATA_FLOAT), |
|
|
|
x_desc_(nullptr), |
|
|
|
b_desc_(nullptr), |
|
|
|
op_desc_(nullptr) {} |
|
|
|
op_desc_(nullptr), |
|
|
|
is_null_input_(false) {} |
|
|
|
~BiasAddGpuKernel() override { DestroyResource(); } |
|
|
|
|
|
|
|
const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; } |
|
|
|
@@ -45,6 +46,10 @@ class BiasAddGpuKernel : public GpuKernel { |
|
|
|
const std::vector<AddressPtr> &outputs, void *stream_ptr) override { |
|
|
|
VARIABLE_NOT_USED(workspace); |
|
|
|
VARIABLE_NOT_USED(stream_ptr); |
|
|
|
if (is_null_input_) { |
|
|
|
return true; |
|
|
|
} |
|
|
|
|
|
|
|
T *x_addr = GetDeviceAddress<T>(inputs, 0); |
|
|
|
T *b_addr = GetDeviceAddress<T>(inputs, 1); |
|
|
|
T *output_addr = GetDeviceAddress<T>(outputs, 0); |
|
|
|
@@ -65,6 +70,13 @@ class BiasAddGpuKernel : public GpuKernel { |
|
|
|
cudnn_data_type_ = kCudnnDtypeMap[TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0))]; |
|
|
|
auto x_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); |
|
|
|
auto num_dims = x_shape.size(); |
|
|
|
is_null_input_ = CHECK_NULL_INPUT(x_shape); |
|
|
|
if (is_null_input_) { |
|
|
|
MS_LOG(WARNING) << "input is null"; |
|
|
|
InitSizeLists(); |
|
|
|
return true; |
|
|
|
} |
|
|
|
|
|
|
|
if (num_dims < 2) { |
|
|
|
MS_LOG(EXCEPTION) << "input dims must be at least 2, but got " << num_dims; |
|
|
|
} |
|
|
|
@@ -126,6 +138,7 @@ class BiasAddGpuKernel : public GpuKernel { |
|
|
|
cudnnTensorDescriptor_t x_desc_; |
|
|
|
cudnnTensorDescriptor_t b_desc_; |
|
|
|
cudnnOpTensorDescriptor_t op_desc_; |
|
|
|
bool is_null_input_; |
|
|
|
std::vector<size_t> input_size_list_; |
|
|
|
std::vector<size_t> output_size_list_; |
|
|
|
std::vector<size_t> workspace_size_list_; |
|
|
|
|