Browse Source

changed cast to round to zero if casting from float to integral

tags/v1.1.0
Peilin Wang 5 years ago
parent
commit
299509dbfc
2 changed files with 9 additions and 9 deletions
  1. +8
    -8
      mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/cast_impl.cu
  2. +1
    -1
      tests/st/ops/gpu/test_cast_op.py

+ 8
- 8
mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/cast_impl.cu View File

@@ -28,35 +28,35 @@ __device__ __forceinline__ void CastBase(const S *input_addr, T *output_addr) {

// half --> integer
__device__ __forceinline__ void CastBase(const half *input_addr, uint64_t *output_addr) {
*output_addr = __half2ull_rd((*input_addr));
*output_addr = __half2ull_rz((*input_addr));
}

__device__ __forceinline__ void CastBase(const half *input_addr, int64_t *output_addr) {
*output_addr = __half2ll_rd((*input_addr));
*output_addr = __half2ll_rz((*input_addr));
}

__device__ __forceinline__ void CastBase(const half *input_addr, uint32_t *output_addr) {
*output_addr = __half2uint_rd((*input_addr));
*output_addr = __half2uint_rz((*input_addr));
}

__device__ __forceinline__ void CastBase(const half *input_addr, int32_t *output_addr) {
*output_addr = __half2int_rd((*input_addr));
*output_addr = __half2int_rz((*input_addr));
}

__device__ __forceinline__ void CastBase(const half *input_addr, uint16_t *output_addr) {
*output_addr = __half2ushort_rd((*input_addr));
*output_addr = __half2ushort_rz((*input_addr));
}

__device__ __forceinline__ void CastBase(const half *input_addr, int16_t *output_addr) {
*output_addr = __half2short_rd((*input_addr));
*output_addr = __half2short_rz((*input_addr));
}

__device__ __forceinline__ void CastBase(const half *input_addr, uint8_t *output_addr) {
*output_addr = static_cast<uint8_t>(__half2ushort_rd((*input_addr)));
*output_addr = static_cast<uint8_t>(__half2ushort_rz((*input_addr)));
}

__device__ __forceinline__ void CastBase(const half *input_addr, int8_t *output_addr) {
*output_addr = static_cast<int8_t>(__half2short_rd((*input_addr)));
*output_addr = static_cast<int8_t>(__half2short_rz((*input_addr)));
}

// integer --> half


+ 1
- 1
tests/st/ops/gpu/test_cast_op.py View File

@@ -604,7 +604,7 @@ def test_cast31():
@pytest.mark.env_onecard
def test_cast32():
np.random.seed(10)
x = np.random.rand(*(3, 2)).astype(np.float16)
x = np.random.uniform(-5, 5, (3, 2)).astype(np.float16)
x0 = Tensor(x)
t0 = mstype.int32
x1 = Tensor(x)


Loading…
Cancel
Save