Merge pull request !4130 from hanhuifeng/gpu_dropouttags/v0.7.0-beta
| @@ -54,12 +54,18 @@ class DropoutGpuFwdKernel : public GpuKernel { | |||||
| float *mask_f = GetDeviceAddress<float>(workspace, 0); | float *mask_f = GetDeviceAddress<float>(workspace, 0); | ||||
| if (!states_init_) { | if (!states_init_) { | ||||
| curandCreateGenerator(&mask_generator_, CURAND_RNG_PSEUDO_DEFAULT); | |||||
| curandSetPseudoRandomGeneratorSeed(mask_generator_, time(NULL)); | |||||
| CHECK_CURAND_RET_WITH_EXCEPT(curandCreateGenerator(&mask_generator_, CURAND_RNG_PSEUDO_DEFAULT), | |||||
| "Failed to create generator"); | |||||
| CHECK_CURAND_RET_WITH_EXCEPT(curandSetPseudoRandomGeneratorSeed(mask_generator_, time(NULL)), | |||||
| "Failed to SetPseudoRandomGeneratorSeed"); | |||||
| MS_EXCEPTION_IF_NULL(mask_generator_); | |||||
| states_init_ = true; | states_init_ = true; | ||||
| } | } | ||||
| CHECK_CURAND_RET_WITH_EXCEPT(curandSetStream(mask_generator_, reinterpret_cast<cudaStream_t>(stream_ptr)), | |||||
| "Failed to set stream for generator"); | |||||
| // curandGen only support float or double for mask. | // curandGen only support float or double for mask. | ||||
| curandGenerateUniform(mask_generator_, mask_f, num_count_); | |||||
| CHECK_CURAND_RET_WITH_EXCEPT(curandGenerateUniform(mask_generator_, mask_f, num_count_), | |||||
| "Failed to generate uniform"); | |||||
| DropoutForward(input, mask, output, mask_f, num_count_, keep_prob_, reinterpret_cast<cudaStream_t>(stream_ptr)); | DropoutForward(input, mask, output, mask_f, num_count_, keep_prob_, reinterpret_cast<cudaStream_t>(stream_ptr)); | ||||
| return true; | return true; | ||||
| @@ -20,7 +20,9 @@ | |||||
| #include <iostream> | #include <iostream> | ||||
| #include <vector> | #include <vector> | ||||
| #include <algorithm> | #include <algorithm> | ||||
| #include <map> | |||||
| #include "utils/log_adapter.h" | #include "utils/log_adapter.h" | ||||
| #include "include/curand.h" | |||||
| namespace mindspore { | namespace mindspore { | ||||
| namespace device { | namespace device { | ||||
| @@ -131,6 +133,15 @@ inline bool CheckNullInput(std::vector<size_t> input_shape) { | |||||
| return false; | return false; | ||||
| } | } | ||||
| #define CHECK_NULL_INPUT(input_shape) mindspore::device::gpu::CheckNullInput(input_shape) | #define CHECK_NULL_INPUT(input_shape) mindspore::device::gpu::CheckNullInput(input_shape) | ||||
| #define CHECK_CURAND_RET_WITH_EXCEPT(expression, message) \ | |||||
| { \ | |||||
| curandStatus_t status = (expression); \ | |||||
| if (status != CURAND_STATUS_SUCCESS) { \ | |||||
| MS_LOG(EXCEPTION) << "CUAD curand Error: " << message << " | curandStatus: " << status; \ | |||||
| } \ | |||||
| } | |||||
| } // namespace gpu | } // namespace gpu | ||||
| } // namespace device | } // namespace device | ||||
| } // namespace mindspore | } // namespace mindspore | ||||
| @@ -0,0 +1,54 @@ | |||||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||||
| # | |||||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| # you may not use this file except in compliance with the License. | |||||
| # You may obtain a copy of the License at | |||||
| # | |||||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||||
| # | |||||
| # Unless required by applicable law or agreed to in writing, software | |||||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| # See the License for the specific language governing permissions and | |||||
| # limitations under the License. | |||||
| # ============================================================================ | |||||
| import numpy as np | |||||
| import pytest | |||||
| import mindspore.nn as nn | |||||
| from mindspore import Tensor | |||||
| from mindspore.ops import operations as P | |||||
| class Net(nn.Cell): | |||||
| def __init__(self, keep_prob): | |||||
| super(Net, self).__init__() | |||||
| self.drop = P.Dropout(keep_prob) | |||||
| def construct(self, x_): | |||||
| return self.drop(x_) | |||||
| @pytest.mark.level0 | |||||
| @pytest.mark.platform_x86_gpu_training | |||||
| @pytest.mark.env_onecard | |||||
| def test_dropout(): | |||||
| x_shape = [32, 16, 2, 5] | |||||
| x = np.ones(x_shape).astype(np.float32) | |||||
| keep_prob = 0.4 | |||||
| dropout = Net(keep_prob) | |||||
| tx = Tensor(x) | |||||
| output, mask = dropout(tx) | |||||
| # check output | |||||
| output_np = output.asnumpy() | |||||
| elem_count = x.size | |||||
| nonzero_count = np.count_nonzero(output_np) | |||||
| assert (elem_count * (keep_prob - 0.1)) < nonzero_count < (elem_count * (keep_prob + 0.1)) | |||||
| output_sum = np.sum(output_np) | |||||
| x_sum = np.sum(x) | |||||
| assert abs(output_sum - x_sum)/x_sum < 0.1 | |||||
| # check mask | |||||
| mask_np = mask.asnumpy() | |||||
| mask_sum = np.sum(mask_np) | |||||
| assert np.count_nonzero(mask_np) == nonzero_count | |||||
| assert abs(mask_sum - nonzero_count)/nonzero_count < 0.1 | |||||