Browse Source

fix(dnn): fix the sync problem in some kernels

GitOrigin-RevId: df3f7dc51b
tags/v1.9.0
Megvii Engine Team 3 years ago
parent
commit
6dc0c0b9cc
2 changed files with 3 additions and 2 deletions
  1. +2
    -1
      dnn/src/cuda/convolution3d/backward_filter/inplace_matmul_impl.cu
  2. +1
    -1
      dnn/src/cuda/padding/padding.cu

+ 2
- 1
dnn/src/cuda/convolution3d/backward_filter/inplace_matmul_impl.cu View File

@@ -355,7 +355,8 @@ void convolution3d::exec_inplace_matmul_bwd_filter(
} else {
BX = BY = 16;
}
cudaMemset(grad, 0, OC * IC * FD * FH * FW * sizeof(float));
cuda_check(
cudaMemsetAsync(grad, 0, OC * IC * FD * FH * FW * sizeof(float), stream));
dim3 blocks(DIVUP(n, 4 * BX), DIVUP(m, 4 * BY), N);
dim3 threads(BX, BY);
#define DISPATCH_BX_BY(BX, BY) \


+ 1
- 1
dnn/src/cuda/padding/padding.cu View File

@@ -224,7 +224,7 @@ void padding_backward_proxy(
params.offsets[i * 2 + 1] = offsets[i * 2 + 1];
}

cudaMemset(dst.raw_ptr(), 0, dst.layout.access_bytes());
cuda_check(cudaMemsetAsync(dst.raw_ptr(), 0, dst.layout.access_bytes(), stream));

void (*bwd_kern)(const size_t, const size_t, const T* const, T* const, ShapeParams);



Loading…
Cancel
Save