|
|
|
@@ -18,24 +18,24 @@ |
|
|
|
|
|
|
|
template <typename T, typename S> |
|
|
|
__global__ void SigmoidCrossEntropyWithLogitsGradKernel(const size_t size, const T *logits, const S *labels, |
|
|
|
T *outputs) { |
|
|
|
const T *dout_addr, T *outputs) { |
|
|
|
for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < size; i += gridDim.x * blockDim.x) { |
|
|
|
if (logits[i] >= 0) { |
|
|
|
outputs[i] = 1. / (1. + exp(-logits[i])) - labels[i]; |
|
|
|
outputs[i] = (1. / (1. + exp(-logits[i])) - labels[i]) * dout_addr[i]; |
|
|
|
} else { |
|
|
|
const T exp_val = exp(logits[i]); |
|
|
|
outputs[i] = exp_val / (1. + exp_val) - labels[i]; |
|
|
|
outputs[i] = (exp_val / (1. + exp_val) - labels[i]) * dout_addr[i]; |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
template <typename T, typename S> |
|
|
|
void SigmoidCrossEntropyWithLogitsGrad(const size_t size, const T *logits, const S *labels, T *outputs, |
|
|
|
cudaStream_t cuda_stream) { |
|
|
|
void SigmoidCrossEntropyWithLogitsGrad(const size_t size, const T *logits, const S *labels, const T *dout_addr, |
|
|
|
T *outputs, cudaStream_t cuda_stream) { |
|
|
|
SigmoidCrossEntropyWithLogitsGradKernel<<<GET_BLOCKS(size), GET_THREADS, 0, cuda_stream>>>(size, logits, labels, |
|
|
|
outputs); |
|
|
|
dout_addr, outputs); |
|
|
|
} |
|
|
|
|
|
|
|
template void SigmoidCrossEntropyWithLogitsGrad<float, float>(const size_t size, const float *logits, |
|
|
|
const float *labels, float *outputs, |
|
|
|
cudaStream_t cuda_stream); |
|
|
|
const float *labels, const float *dout_addr, |
|
|
|
float *outputs, cudaStream_t cuda_stream); |