From: @TFbunny Reviewed-by: @wilfchen,@liangchenghui Signed-off-by: @liangchenghuipull/15605/MERGE
| @@ -1,5 +1,5 @@ | |||||
| /** | /** | ||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * Copyright 2020-2021 Huawei Technologies Co., Ltd | |||||
| * | * | ||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| * you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
| @@ -72,6 +72,7 @@ class BatchNormFold2GradGpuKernel : public GpuKernel { | |||||
| cudaMemcpyAsync(current_step_host, global_step, sizeof(int32_t), cudaMemcpyDeviceToHost, | cudaMemcpyAsync(current_step_host, global_step, sizeof(int32_t), cudaMemcpyDeviceToHost, | ||||
| reinterpret_cast<cudaStream_t>(stream_ptr)), | reinterpret_cast<cudaStream_t>(stream_ptr)), | ||||
| "Failed to copy gpu memory."); | "Failed to copy gpu memory."); | ||||
| CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_, cudaDeviceSynchronize(), "cudaDeviceSyncFailed"); | |||||
| CHECK_CUDA_RET_WITH_ERROR( | CHECK_CUDA_RET_WITH_ERROR( | ||||
| kernel_node_, | kernel_node_, | ||||
| cudaMemcpyAsync(d_x, dout, x_size, cudaMemcpyDeviceToDevice, reinterpret_cast<cudaStream_t>(stream_ptr)), | cudaMemcpyAsync(d_x, dout, x_size, cudaMemcpyDeviceToDevice, reinterpret_cast<cudaStream_t>(stream_ptr)), | ||||
| @@ -1,5 +1,5 @@ | |||||
| /** | /** | ||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * Copyright 2020-2021 Huawei Technologies Co., Ltd | |||||
| * | * | ||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| * you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
| @@ -62,6 +62,7 @@ class BatchNormFoldGpuKernel : public GpuKernel { | |||||
| cudaMemcpyAsync(current_step_host, current_step, sizeof(int), cudaMemcpyDeviceToHost, | cudaMemcpyAsync(current_step_host, current_step, sizeof(int), cudaMemcpyDeviceToHost, | ||||
| reinterpret_cast<cudaStream_t>(stream_ptr)), | reinterpret_cast<cudaStream_t>(stream_ptr)), | ||||
| "Copy gpu memoy failed."); | "Copy gpu memoy failed."); | ||||
| CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_, cudaDeviceSynchronize(), "cudaDeviceSyncFailed"); | |||||
| if (x == nullptr) { | if (x == nullptr) { | ||||
| MS_LOG(ERROR) << "BatchNormFoldGpuKernel x is null."; | MS_LOG(ERROR) << "BatchNormFoldGpuKernel x is null."; | ||||
| return false; | return false; | ||||
| @@ -1,5 +1,5 @@ | |||||
| /** | /** | ||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * Copyright 2020-2021 Huawei Technologies Co., Ltd | |||||
| * | * | ||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| * you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
| @@ -60,6 +60,7 @@ class BatchNormFoldGradGpuKernel : public GpuKernel { | |||||
| cudaMemcpyAsync(current_step_host, current_step, sizeof(int), cudaMemcpyDeviceToHost, | cudaMemcpyAsync(current_step_host, current_step, sizeof(int), cudaMemcpyDeviceToHost, | ||||
| reinterpret_cast<cudaStream_t>(stream_ptr)), | reinterpret_cast<cudaStream_t>(stream_ptr)), | ||||
| "Copy gpu memoy failed."); | "Copy gpu memoy failed."); | ||||
| CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_, cudaDeviceSynchronize(), "cudaDeviceSyncFailed"); | |||||
| if (d_batch_mean == nullptr) { | if (d_batch_mean == nullptr) { | ||||
| MS_LOG(ERROR) << "BatchNormFoldGradGpuKernel d_batch_mean is null."; | MS_LOG(ERROR) << "BatchNormFoldGradGpuKernel d_batch_mean is null."; | ||||
| return false; | return false; | ||||
| @@ -1,5 +1,5 @@ | |||||
| /** | /** | ||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * Copyright 2020-2021 Huawei Technologies Co., Ltd | |||||
| * | * | ||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| * you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
| @@ -52,6 +52,7 @@ class UniformCandidateSamplerGpuKernel : public GpuKernel { | |||||
| cudaMemcpyAsync(&array_input_[0], input, input_size_ * sizeof(T), | cudaMemcpyAsync(&array_input_[0], input, input_size_ * sizeof(T), | ||||
| cudaMemcpyDeviceToHost, reinterpret_cast<cudaStream_t>(stream_ptr)), | cudaMemcpyDeviceToHost, reinterpret_cast<cudaStream_t>(stream_ptr)), | ||||
| "cudaMemcpyAsync sampled_candidates failed"); | "cudaMemcpyAsync sampled_candidates failed"); | ||||
| CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_, cudaDeviceSynchronize(), "cudaDeviceSyncFailed"); | |||||
| for (const auto item : array_input_) { | for (const auto item : array_input_) { | ||||
| set_input_.insert(item); | set_input_.insert(item); | ||||
| } | } | ||||