| @@ -69,7 +69,10 @@ class ScatterNdGpuFwdKernel : public GpuKernel { | |||||
| memcpy_flag_ = true; | memcpy_flag_ = true; | ||||
| } | } | ||||
| ScatterNd(indices, update, output, block_size_, input_size_, output_size_, indices_dim_0_, indices_dim_1_, | |||||
| const size_t input_size = input_size_ / sizeof(T); | |||||
| const size_t output_size = output_size_ / sizeof(T); | |||||
| ScatterNd(indices, update, output, block_size_, input_size, output_size, indices_dim_0_, indices_dim_1_, | |||||
| indices_stride_, work_shape_, reinterpret_cast<cudaStream_t>(stream_ptr)); | indices_stride_, work_shape_, reinterpret_cast<cudaStream_t>(stream_ptr)); | ||||
| return true; | return true; | ||||
| } | } | ||||
| @@ -138,7 +141,7 @@ class ScatterNdGpuFwdKernel : public GpuKernel { | |||||
| // calculate indices dim 0/1 | // calculate indices dim 0/1 | ||||
| indices_dim_0_ = indices_shapes_[0]; | indices_dim_0_ = indices_shapes_[0]; | ||||
| indices_dim_1_ = indices_shapes_[1]; | |||||
| indices_dim_1_ = indices_shapes_[indices_shapes_.size() - 1]; | |||||
| // calculate block_size | // calculate block_size | ||||
| for (size_t i = indices_dim_1_; i < output_shapes_.size(); i++) { | for (size_t i = indices_dim_1_; i < output_shapes_.size(); i++) { | ||||
| @@ -146,10 +149,7 @@ class ScatterNdGpuFwdKernel : public GpuKernel { | |||||
| } | } | ||||
| // calculate indices_stride | // calculate indices_stride | ||||
| for (size_t i = 0; i < indices_dim_1_; i++) { | |||||
| vec_indices_stride_.push_back(0); | |||||
| } | |||||
| vec_indices_stride_.resize(indices_dim_1_, 0); | |||||
| vec_indices_stride_[indices_dim_1_ - 1] = block_size_; | vec_indices_stride_[indices_dim_1_ - 1] = block_size_; | ||||
| for (size_t i = indices_dim_1_ - 1; i > 0; --i) { | for (size_t i = indices_dim_1_ - 1; i > 0; --i) { | ||||
| @@ -50,12 +50,12 @@ __global__ void IOUKernel(const size_t size, const T *box1, const T *box2, T *io | |||||
| T area1 = (location_coordinate[0][2] - location_coordinate[0][0] + 1) * (location_coordinate[0][3] - | T area1 = (location_coordinate[0][2] - location_coordinate[0][0] + 1) * (location_coordinate[0][3] - | ||||
| location_coordinate[0][1] + 1); | location_coordinate[0][1] + 1); | ||||
| T area2 = (location_coordinate[1][2] - location_coordinate[1][0] + 1) * (location_coordinate[1][3] - | |||||
| location_coordinate[1][1] + 1); | |||||
| if (mode == 0) { | if (mode == 0) { | ||||
| T area2 = (location_coordinate[1][2] - location_coordinate[1][0] + 1) * (location_coordinate[1][3] - | |||||
| location_coordinate[1][1] + 1); | |||||
| iou_results[i] = overlaps / (area1 + area2 - overlaps + epsilon); | iou_results[i] = overlaps / (area1 + area2 - overlaps + epsilon); | ||||
| } else { | } else { | ||||
| iou_results[i] = overlaps / (area1 + epsilon); | |||||
| iou_results[i] = overlaps / (area2 + epsilon); | |||||
| } | } | ||||
| } | } | ||||
| @@ -15,7 +15,9 @@ | |||||
| */ | */ | ||||
| #include "backend/kernel_compiler/gpu/cuda_impl/scatter_nd.cuh" | #include "backend/kernel_compiler/gpu/cuda_impl/scatter_nd.cuh" | ||||
| #include "backend/kernel_compiler/gpu/cuda_impl/util.cuh" | |||||
| #include "runtime/device/gpu/cuda_common.h" | #include "runtime/device/gpu/cuda_common.h" | ||||
| template <typename T, typename S> | template <typename T, typename S> | ||||
| __global__ void ScatterNdKernel(S *indices, T *update, T *output, const size_t block_size, const size_t input_size, | __global__ void ScatterNdKernel(S *indices, T *update, T *output, const size_t block_size, const size_t input_size, | ||||
| const size_t output_size, const size_t indices_dim_0, const size_t indices_dim_1, | const size_t output_size, const size_t indices_dim_0, const size_t indices_dim_1, | ||||
| @@ -39,7 +41,7 @@ __global__ void ScatterNdKernel(S *indices, T *update, T *output, const size_t b | |||||
| out_bound |= write_index >= output_size; | out_bound |= write_index >= output_size; | ||||
| if (!out_bound) { | if (!out_bound) { | ||||
| output[write_index] = update[read_index]; | |||||
| ms_atomic_add(&output[write_index], update[read_index]); | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| @@ -48,7 +50,7 @@ template <typename T, typename S> | |||||
| void ScatterNd(S *indices, T *update, T *output, const size_t &block_size, const size_t &input_size, | void ScatterNd(S *indices, T *update, T *output, const size_t &block_size, const size_t &input_size, | ||||
| const size_t &output_size, const size_t &indices_dim_0, const size_t &indices_dim_1, S *indices_stride, | const size_t &output_size, const size_t &indices_dim_0, const size_t &indices_dim_1, S *indices_stride, | ||||
| S *work_shape, cudaStream_t stream) { | S *work_shape, cudaStream_t stream) { | ||||
| ScatterNdKernel<<<GET_BLOCKS(input_size), GET_THREADS, 0, stream>>>(indices, update, output, block_size, input_size, | |||||
| ScatterNdKernel<<<GET_BLOCKS(output_size), GET_THREADS, 0, stream>>>(indices, update, output, block_size, input_size, | |||||
| output_size, indices_dim_0, indices_dim_1, | output_size, indices_dim_0, indices_dim_1, | ||||
| indices_stride, work_shape); | indices_stride, work_shape); | ||||
| return; | return; | ||||
| @@ -22,12 +22,12 @@ __global__ void SGDKernel(const int size, const T dampening, const T weight_deca | |||||
| const T *momentum, const T *lr, T *param, T *accum, T *stat) { | const T *momentum, const T *lr, T *param, T *accum, T *stat) { | ||||
| for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < (size); i += blockDim.x * gridDim.x) { | for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < (size); i += blockDim.x * gridDim.x) { | ||||
| T grad_new = grad[i]; | T grad_new = grad[i]; | ||||
| if (weight_decay != static_cast<T>(0)) { | |||||
| if (weight_decay > static_cast<T>(0)) { | |||||
| grad_new += param[i] * weight_decay; | grad_new += param[i] * weight_decay; | ||||
| } | } | ||||
| if (momentum[0] != static_cast<T>(0)) { | |||||
| if (stat[i] == static_cast<T>(0)) { | |||||
| if (momentum[0] > static_cast<T>(0)) { | |||||
| if (stat[i] > static_cast<T>(0)) { | |||||
| accum[i] = grad_new; | accum[i] = grad_new; | ||||
| stat[i] = 0; | stat[i] = 0; | ||||
| } else { | } else { | ||||
| @@ -101,6 +101,8 @@ class BoundingBoxEncode(PrimitiveWithInfer): | |||||
| def infer_shape(self, anchor_box, groundtruth_box): | def infer_shape(self, anchor_box, groundtruth_box): | ||||
| validator.check('anchor_box shape[0]', anchor_box[0], 'groundtruth_box shape[0]', groundtruth_box[0], Rel.EQ, | validator.check('anchor_box shape[0]', anchor_box[0], 'groundtruth_box shape[0]', groundtruth_box[0], Rel.EQ, | ||||
| self.name) | self.name) | ||||
| validator.check("anchor_box rank", len(anchor_box), "", 2, Rel.EQ, self.name) | |||||
| validator.check("groundtruth_box rank", len(groundtruth_box), "", 2, Rel.EQ, self.name) | |||||
| validator.check_integer('anchor_box shape[1]', anchor_box[1], 4, Rel.EQ, self.name) | validator.check_integer('anchor_box shape[1]', anchor_box[1], 4, Rel.EQ, self.name) | ||||
| validator.check_integer('groundtruth_box shape[1]', groundtruth_box[1], 4, Rel.EQ, self.name) | validator.check_integer('groundtruth_box shape[1]', groundtruth_box[1], 4, Rel.EQ, self.name) | ||||
| return anchor_box | return anchor_box | ||||
| @@ -152,6 +154,8 @@ class BoundingBoxDecode(PrimitiveWithInfer): | |||||
| def infer_shape(self, anchor_box, deltas): | def infer_shape(self, anchor_box, deltas): | ||||
| validator.check('anchor_box shape[0]', anchor_box[0], 'deltas shape[0]', deltas[0], Rel.EQ, self.name) | validator.check('anchor_box shape[0]', anchor_box[0], 'deltas shape[0]', deltas[0], Rel.EQ, self.name) | ||||
| validator.check("anchor_box rank", len(anchor_box), "", 2, Rel.EQ, self.name) | |||||
| validator.check("deltas rank", len(deltas), "", 2, Rel.EQ, self.name) | |||||
| validator.check_integer('anchor_box shape[1]', anchor_box[1], 4, Rel.EQ, self.name) | validator.check_integer('anchor_box shape[1]', anchor_box[1], 4, Rel.EQ, self.name) | ||||
| validator.check_integer('deltas shape[1]', deltas[1], 4, Rel.EQ, self.name) | validator.check_integer('deltas shape[1]', deltas[1], 4, Rel.EQ, self.name) | ||||
| return anchor_box | return anchor_box | ||||