Browse Source

add int64-->fp16 and update conv pad

tags/v0.6.0-beta
VectorSL 5 years ago
parent
commit
90f15df037
6 changed files with 38 additions and 10 deletions
  1. +2
    -0
      mindspore/_akg/gpu/cast.py
  2. +5
    -3
      mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_gpu_kernel.h
  3. +5
    -3
      mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_grad_filter_gpu_kernel.h
  4. +5
    -3
      mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_grad_input_gpu_kernel.h
  5. +3
    -0
      mindspore/ops/_op_impl/akg/gpu/cast.py
  6. +18
    -1
      tests/st/ops/gpu/test_cast_op.py

+ 2
- 0
mindspore/_akg/gpu/cast.py View File

@@ -20,6 +20,8 @@ from _akg.topi.generic import schedule_elemwise

def Cast(x, dst_type):
"""cast."""
if x.dtype == "int64" and dst_type == "float16":
x = cast.cast(x, "float32")
return cast.cast(x, dst_type)




+ 5
- 3
mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_gpu_kernel.h View File

@@ -109,12 +109,14 @@ class Conv2dGpuFwdKernel : public GpuKernel {
Set4DDesc(in_shape, filter_shape, output_shape);
group_ = GetAttr<int>(kernel_node, "group");
CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetConvolutionGroupCount(conv_desc_, group_), "cudnnSetConvGroupCount failed");
pad_height_ = GetAttr<int>(kernel_node, "pad");
pad_width_ = pad_height_;
auto pad_list = GetAttr<std::vector<int>>(kernel_node, "pad_list");
pad_height_ = pad_list[0];
pad_width_ = pad_list[2];
auto symmetry_pad = (pad_height_ == pad_list[1]) && (pad_width_ == pad_list[3]);
pad_mode_ = GetAttr<std::string>(kernel_node, "pad_mode");
SetStrideAndDilation(kernel_node);
cudnnTensorDescriptor_t input_descriptor_real = nullptr;
if (pad_mode_ == kSamePadModeUpperCase || pad_mode_ == kSamePadModeLowerCase) {
if (pad_mode_ == kSamePadModeUpperCase || pad_mode_ == kSamePadModeLowerCase || !symmetry_pad) {
SetPad(in_shape, kernel_node);
input_descriptor_real = use_pad_ ? padded_desc_ : input_desc_;
} else {


+ 5
- 3
mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_grad_filter_gpu_kernel.h View File

@@ -113,12 +113,14 @@ class ConvGradFilterGpuBkwKernel : public GpuKernel {
group_ = GetAttr<int>(kernel_node, "group");
CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetConvolutionGroupCount(conv_desc_, group_), "cudnnSetConvGroupCount failed");

pad_height_ = GetAttr<int>(kernel_node, "pad");
pad_width_ = pad_height_;
auto pad_list = GetAttr<std::vector<int>>(kernel_node, "pad_list");
pad_height_ = pad_list[0];
pad_width_ = pad_list[2];
auto symmetry_pad = (pad_height_ == pad_list[1]) && (pad_width_ == pad_list[3]);
pad_mode_ = GetAttr<std::string>(kernel_node, "pad_mode");
SetStrideAndDilation(kernel_node);
cudnnTensorDescriptor_t x_desc_real = nullptr;
if (pad_mode_ == kSamePadModeUpperCase || pad_mode_ == kSamePadModeLowerCase) {
if (pad_mode_ == kSamePadModeUpperCase || pad_mode_ == kSamePadModeLowerCase || !symmetry_pad) {
SetPad(in_shape, kernel_node);
x_desc_real = use_pad_ ? padded_descriptor_ : x_desc_;
} else {


+ 5
- 3
mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_grad_input_gpu_kernel.h View File

@@ -114,12 +114,14 @@ class ConvGradInputGpuBkwKernel : public GpuKernel {
group_ = GetAttr<int>(kernel_node, "group");
CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetConvolutionGroupCount(conv_desc_, group_), "cudnnSetConvGroupCount failed");

pad_height_ = GetAttr<int>(kernel_node, "pad");
pad_width_ = pad_height_;
auto pad_list = GetAttr<std::vector<int>>(kernel_node, "pad_list");
pad_height_ = pad_list[0];
pad_width_ = pad_list[2];
auto symmetry_pad = (pad_height_ == pad_list[1]) && (pad_width_ == pad_list[3]);
pad_mode_ = GetAttr<std::string>(kernel_node, "pad_mode");
SetStrideAndDilation(kernel_node);
cudnnTensorDescriptor_t dx_desc_real = nullptr;
if (pad_mode_ == kSamePadModeUpperCase || pad_mode_ == kSamePadModeLowerCase) {
if (pad_mode_ == kSamePadModeUpperCase || pad_mode_ == kSamePadModeLowerCase || !symmetry_pad) {
SetPad(input_shape, kernel_node);
dx_desc_real = use_pad_ ? padded_descriptor_ : dx_desc_;
} else {


+ 3
- 0
mindspore/ops/_op_impl/akg/gpu/cast.py View File

@@ -50,6 +50,9 @@ cast_op_info = AkgGpuRegOp("Cast") \
.dtype_format(DataType.I16_Default, DataType.I32_Default) \
.dtype_format(DataType.I16_Default, DataType.I64_Default) \
.dtype_format(DataType.I64_Default, DataType.F64_Default) \
.dtype_format(DataType.I64_Default, DataType.F32_Default) \
.dtype_format(DataType.I64_Default, DataType.F16_Default) \
.dtype_format(DataType.I64_Default, DataType.I32_Default) \
.dtype_format(DataType.I16_Default, DataType.F32_Default) \
.dtype_format(DataType.I16_Default, DataType.F16_Default) \
.dtype_format(DataType.F32_Default, DataType.I32_Default) \


+ 18
- 1
tests/st/ops/gpu/test_cast_op.py View File

@@ -92,7 +92,7 @@ def test_cast2():
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
def test_cast3():
x0 = Tensor(np.arange(24).reshape((4, 3, 2)).astype(np.float16))
x0 = Tensor(np.arange(24).reshape((4, 3, 2)).astype(np.int64))
t0 = mstype.int32
x1 = Tensor(np.arange(24).reshape((4, 3, 2)).astype(np.float32))
t1 = mstype.int32
@@ -342,3 +342,20 @@ def test_cast17():
assert type0 == 'float32'
type1 = output[1].asnumpy().dtype
assert type1 == 'float16'

@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
def test_cast18():
x0 = Tensor(np.arange(24).reshape((4, 3, 2)).astype(np.int64))
t0 = mstype.float32
x1 = Tensor(np.arange(24).reshape((4, 3, 2)).astype(np.int64))
t1 = mstype.float16

context.set_context(mode=context.GRAPH_MODE, device_target='GPU')
net = Net(t0, t1)
output = net(x0, x1)
type0 = output[0].asnumpy().dtype
assert type0 == 'float32'
type1 = output[1].asnumpy().dtype
assert type1 == 'float16'

Loading…
Cancel
Save