| @@ -96,10 +96,10 @@ class ScatterAddKernel : public GpuKernel { | |||||
| } | } | ||||
| private: | private: | ||||
| int input_size_; | |||||
| int inner_size_; | |||||
| int indices_size_; | |||||
| int updates_size_; | |||||
| size_t input_size_; | |||||
| size_t inner_size_; | |||||
| size_t indices_size_; | |||||
| size_t updates_size_; | |||||
| std::vector<size_t> input_size_list_; | std::vector<size_t> input_size_list_; | ||||
| std::vector<size_t> output_size_list_; | std::vector<size_t> output_size_list_; | ||||
| std::vector<size_t> workspace_size_list_; | std::vector<size_t> workspace_size_list_; | ||||
| @@ -96,10 +96,10 @@ class ScatterUpdateKernel : public GpuKernel { | |||||
| } | } | ||||
| private: | private: | ||||
| int input_size_; | |||||
| int inner_size_; | |||||
| int indices_size_; | |||||
| int updates_size_; | |||||
| size_t input_size_; | |||||
| size_t inner_size_; | |||||
| size_t indices_size_; | |||||
| size_t updates_size_; | |||||
| std::vector<size_t> input_size_list_; | std::vector<size_t> input_size_list_; | ||||
| std::vector<size_t> output_size_list_; | std::vector<size_t> output_size_list_; | ||||
| std::vector<size_t> workspace_size_list_; | std::vector<size_t> workspace_size_list_; | ||||
| @@ -18,7 +18,7 @@ | |||||
| #include "backend/kernel_compiler/gpu/cuda_impl/scatter_add_impl.cuh" | #include "backend/kernel_compiler/gpu/cuda_impl/scatter_add_impl.cuh" | ||||
| template <typename T> | template <typename T> | ||||
| __global__ void ScatterAdd(const int inner_size, const int updates_size, const int *indices, const T *updates, | |||||
| __global__ void ScatterAdd(const size_t inner_size, const size_t updates_size, const int *indices, const T *updates, | |||||
| T *input) { | T *input) { | ||||
| for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < updates_size; pos += blockDim.x * gridDim.x) { | for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < updates_size; pos += blockDim.x * gridDim.x) { | ||||
| const size_t index = pos / inner_size; | const size_t index = pos / inner_size; | ||||
| @@ -29,16 +29,16 @@ __global__ void ScatterAdd(const int inner_size, const int updates_size, const i | |||||
| } | } | ||||
| template <typename T> | template <typename T> | ||||
| void CalScatterAdd(const int &inner_size, const int &indices_size, const int *indices, const T *updates, T *input, | |||||
| void CalScatterAdd(const size_t &inner_size, const size_t &indices_size, const int *indices, const T *updates, T *input, | |||||
| cudaStream_t cuda_stream) { | cudaStream_t cuda_stream) { | ||||
| const int updates_size = inner_size * indices_size; | |||||
| const size_t updates_size = inner_size * indices_size; | |||||
| ScatterAdd<<<GET_BLOCKS(updates_size), GET_THREADS, 0, cuda_stream>>>(inner_size, updates_size, indices, updates, | ScatterAdd<<<GET_BLOCKS(updates_size), GET_THREADS, 0, cuda_stream>>>(inner_size, updates_size, indices, updates, | ||||
| input); | input); | ||||
| } | } | ||||
| template void CalScatterAdd<float>(const int &inner_size, const int &indices_size, const int *indices, | |||||
| template void CalScatterAdd<float>(const size_t &inner_size, const size_t &indices_size, const int *indices, | |||||
| const float *updates, float *input, cudaStream_t cuda_stream); | const float *updates, float *input, cudaStream_t cuda_stream); | ||||
| template void CalScatterAdd<half>(const int &inner_size, const int &indices_size, const int *indices, | |||||
| template void CalScatterAdd<half>(const size_t &inner_size, const size_t &indices_size, const int *indices, | |||||
| const half *updates, half *input, cudaStream_t cuda_stream); | const half *updates, half *input, cudaStream_t cuda_stream); | ||||
| template void CalScatterAdd<int>(const int &inner_size, const int &indices_size, const int *indices, const int *updates, | |||||
| int *input, cudaStream_t cuda_stream); | |||||
| template void CalScatterAdd<int>(const size_t &inner_size, const size_t &indices_size, const int *indices, | |||||
| const int *updates, int *input, cudaStream_t cuda_stream); | |||||
| @@ -20,7 +20,7 @@ | |||||
| #include "runtime/device/gpu/cuda_common.h" | #include "runtime/device/gpu/cuda_common.h" | ||||
| template <typename T> | template <typename T> | ||||
| void CalScatterAdd(const int &inner_size, const int &indices_size, const int *indices, const T *updates, T *input, | |||||
| void CalScatterAdd(const size_t &inner_size, const size_t &indices_size, const int *indices, const T *updates, T *input, | |||||
| cudaStream_t cuda_stream); | cudaStream_t cuda_stream); | ||||
| #endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_CUDA_IMPL_SCATTER_ADD_IMPL_CUH_ | #endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_CUDA_IMPL_SCATTER_ADD_IMPL_CUH_ | ||||
| @@ -17,32 +17,32 @@ | |||||
| #include "backend/kernel_compiler/gpu/cuda_impl/scatter_update_impl.cuh" | #include "backend/kernel_compiler/gpu/cuda_impl/scatter_update_impl.cuh" | ||||
| template <typename T> | template <typename T> | ||||
| __global__ void ScatterUpdate(const int inner_size, const int updates_size, const int *indices, const T *updates, | |||||
| __global__ void ScatterUpdate(const size_t inner_size, const size_t updates_size, const int *indices, const T *updates, | |||||
| T *input) { | T *input) { | ||||
| for (int pos = blockIdx.x * blockDim.x + threadIdx.x; pos < updates_size; pos += blockDim.x * gridDim.x) { | |||||
| const int index = pos / inner_size; | |||||
| const int offset = pos % inner_size; | |||||
| const int current_pos = indices[index] * inner_size + offset; | |||||
| for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < updates_size; pos += blockDim.x * gridDim.x) { | |||||
| const size_t index = pos / inner_size; | |||||
| const size_t offset = pos % inner_size; | |||||
| const size_t current_pos = indices[index] * inner_size + offset; | |||||
| input[current_pos] = updates[pos]; | input[current_pos] = updates[pos]; | ||||
| } | } | ||||
| } | } | ||||
| template <typename T> | template <typename T> | ||||
| void CalScatterUpdate(const int &inner_size, const int &indices_size, const int *indices, const T *updates, T *input, | |||||
| cudaStream_t cuda_stream) { | |||||
| const int updates_size = inner_size * indices_size; | |||||
| void CalScatterUpdate(const size_t &inner_size, const size_t &indices_size, const int *indices, const T *updates, | |||||
| T *input, cudaStream_t cuda_stream) { | |||||
| const size_t updates_size = inner_size * indices_size; | |||||
| ScatterUpdate<<<GET_BLOCKS(updates_size), GET_THREADS, 0, cuda_stream>>>(inner_size, updates_size, indices, updates, | ScatterUpdate<<<GET_BLOCKS(updates_size), GET_THREADS, 0, cuda_stream>>>(inner_size, updates_size, indices, updates, | ||||
| input); | input); | ||||
| } | } | ||||
| template void CalScatterUpdate<float>(const int &inner_size, const int &indices_size, const int *indices, | |||||
| template void CalScatterUpdate<float>(const size_t &inner_size, const size_t &indices_size, const int *indices, | |||||
| const float *updates, float *input, cudaStream_t cuda_stream); | const float *updates, float *input, cudaStream_t cuda_stream); | ||||
| template void CalScatterUpdate<half>(const int &inner_size, const int &indices_size, const int *indices, | |||||
| template void CalScatterUpdate<half>(const size_t &inner_size, const size_t &indices_size, const int *indices, | |||||
| const half *updates, half *input, cudaStream_t cuda_stream); | const half *updates, half *input, cudaStream_t cuda_stream); | ||||
| template void CalScatterUpdate<int>(const int &inner_size, const int &indices_size, const int *indices, | |||||
| template void CalScatterUpdate<int>(const size_t &inner_size, const size_t &indices_size, const int *indices, | |||||
| const int *updates, int *input, cudaStream_t cuda_stream); | const int *updates, int *input, cudaStream_t cuda_stream); | ||||
| template void CalScatterUpdate<unsigned char>(const int &inner_size, const int &indices_size, const int *indices, | |||||
| template void CalScatterUpdate<unsigned char>(const size_t &inner_size, const size_t &indices_size, const int *indices, | |||||
| const unsigned char *updates, unsigned char *input, | const unsigned char *updates, unsigned char *input, | ||||
| cudaStream_t cuda_stream); | cudaStream_t cuda_stream); | ||||
| template void CalScatterUpdate<int8_t>(const int &inner_size, const int &indices_size, const int *indices, | |||||
| const int8_t *updates, int8_t *input, cudaStream_t cuda_stream); | |||||
| template void CalScatterUpdate<int8_t>(const size_t &inner_size, const size_t &indices_size, const int *indices, | |||||
| const int8_t *updates, int8_t *input, cudaStream_t cuda_stream); | |||||
| @@ -20,7 +20,7 @@ | |||||
| #include "runtime/device/gpu/cuda_common.h" | #include "runtime/device/gpu/cuda_common.h" | ||||
| template <typename T> | template <typename T> | ||||
| void CalScatterUpdate(const int &inner_size, const int &indices_size, const int *indices, const T *updates, T *input, | |||||
| cudaStream_t cuda_stream); | |||||
| void CalScatterUpdate(const size_t &inner_size, const size_t &indices_size, const int *indices, const T *updates, | |||||
| T *input, cudaStream_t cuda_stream); | |||||
| #endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_CUDA_IMPL_SCATTER_UPDATE_IMPL_CUH_ | #endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_CUDA_IMPL_SCATTER_UPDATE_IMPL_CUH_ | ||||