|
|
|
@@ -26,6 +26,7 @@ void StandardNormal(float *output, std::normal_distribution<float> distribution, |
|
|
|
output[i] = distribution(random_generator); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
void LaunchStandardNormal(int seed, int seed2, const std::vector<AddressPtr> &outputs) { |
|
|
|
unsigned int RNG_seed; |
|
|
|
std::random_device rd; |
|
|
|
@@ -38,33 +39,13 @@ void LaunchStandardNormal(int seed, int seed2, const std::vector<AddressPtr> &ou |
|
|
|
} |
|
|
|
|
|
|
|
auto output = reinterpret_cast<float *>(outputs[0]->addr); |
|
|
|
// multithreading |
|
|
|
size_t lens = outputs[0]->size / sizeof(float); |
|
|
|
auto max_thread_num = std::thread::hardware_concurrency(); |
|
|
|
size_t thread_num = lens < 128 * max_thread_num ? std::ceil(lens / 128.0) : max_thread_num; |
|
|
|
if (thread_num < 1) { |
|
|
|
MS_LOG(ERROR) << "Invalid value: thread_num " << thread_num; |
|
|
|
return; |
|
|
|
} |
|
|
|
std::vector<std::thread> threads; |
|
|
|
threads.reserve(thread_num); |
|
|
|
size_t start = 0; |
|
|
|
size_t once_compute_size = (lens + thread_num - 1) / thread_num; |
|
|
|
if (once_compute_size < 1) { |
|
|
|
MS_LOG(ERROR) << "Invalid value: once_compute_size " << once_compute_size; |
|
|
|
return; |
|
|
|
} |
|
|
|
std::normal_distribution<float> distribution; |
|
|
|
while (start < lens) { |
|
|
|
// avoid different threads using the same seed to generate the same random number |
|
|
|
auto task = [&](size_t start, size_t end) { |
|
|
|
std::default_random_engine random_generator(++RNG_seed); |
|
|
|
size_t end = (start + once_compute_size) > lens ? lens : (start + once_compute_size); |
|
|
|
threads.emplace_back(std::thread(StandardNormal, output, distribution, random_generator, start, end)); |
|
|
|
start += once_compute_size; |
|
|
|
} |
|
|
|
for (size_t i = 0; i < threads.size(); ++i) { |
|
|
|
threads[i].join(); |
|
|
|
} |
|
|
|
StandardNormal(output, distribution, random_generator, start, end); |
|
|
|
}; |
|
|
|
CPUKernelUtils::ParallelFor(task, lens); |
|
|
|
} |
|
|
|
|
|
|
|
void RandomCPUKernel::InitKernel(const CNodePtr &kernel_node) { |
|
|
|
@@ -91,8 +72,7 @@ void RandomCPUKernel::InitKernel(const CNodePtr &kernel_node) { |
|
|
|
seed2_ = LongToInt(GetValue<int64_t>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("seed2"))); |
|
|
|
} |
|
|
|
|
|
|
|
bool RandomCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, |
|
|
|
const std::vector<kernel::AddressPtr> & /*workspace*/, |
|
|
|
bool RandomCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &, |
|
|
|
const std::vector<kernel::AddressPtr> &outputs) { |
|
|
|
switch (random_op_type_) { |
|
|
|
case RANDOM_OP_NORMAL: { |
|
|
|
|