From: @zhao_ting_v Reviewed-by: @kisnwang,@liangchenghui Signed-off-by: @liangchenghuitags/v1.1.0
| @@ -88,11 +88,15 @@ bool AdamCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||||
| size_t thread_num = lens < 128 * max_thread_num ? std::ceil(lens / 128.0) : max_thread_num; | size_t thread_num = lens < 128 * max_thread_num ? std::ceil(lens / 128.0) : max_thread_num; | ||||
| MS_LOG(INFO) << "Lens=" << lens << "; use thread_num=" << thread_num << "; max_thread_num: " << max_thread_num; | MS_LOG(INFO) << "Lens=" << lens << "; use thread_num=" << thread_num << "; max_thread_num: " << max_thread_num; | ||||
| std::vector<std::thread> threads; | std::vector<std::thread> threads; | ||||
| if (thread_num < 1) { | |||||
| MS_LOG(ERROR) << "Invalid value: thread_num " << thread_num; | |||||
| return false; | |||||
| } | |||||
| threads.reserve(thread_num); | threads.reserve(thread_num); | ||||
| size_t start = 0; | size_t start = 0; | ||||
| size_t once_compute_size = (lens + thread_num - 1) / thread_num; | size_t once_compute_size = (lens + thread_num - 1) / thread_num; | ||||
| if (thread_num < 1 || once_compute_size < 1) { | |||||
| MS_LOG(ERROR) << "Invalid value: thread_num " << thread_num << "; once_compute_size " << once_compute_size; | |||||
| if (once_compute_size < 1) { | |||||
| MS_LOG(ERROR) << "Invalid value: once_compute_size " << once_compute_size; | |||||
| return false; | return false; | ||||
| } | } | ||||
| while (start < lens) { | while (start < lens) { | ||||
| @@ -186,9 +186,17 @@ void ArithmeticCPUKernel::LaunchLess(const std::vector<AddressPtr> &inputs, cons | |||||
| size_t thread_num = lens < 128 * max_thread_num ? std::ceil(lens / 128.0) : max_thread_num; | size_t thread_num = lens < 128 * max_thread_num ? std::ceil(lens / 128.0) : max_thread_num; | ||||
| MS_LOG(INFO) << "Lens=" << lens << "; use thread_num=" << thread_num << "; max_thread_num: " << max_thread_num; | MS_LOG(INFO) << "Lens=" << lens << "; use thread_num=" << thread_num << "; max_thread_num: " << max_thread_num; | ||||
| std::vector<std::thread> threads; | std::vector<std::thread> threads; | ||||
| if (thread_num < 1) { | |||||
| MS_LOG(ERROR) << "Invalid value: thread_num " << thread_num; | |||||
| return; | |||||
| } | |||||
| threads.reserve(thread_num); | threads.reserve(thread_num); | ||||
| size_t start = 0; | size_t start = 0; | ||||
| size_t once_compute_size = (lens + thread_num - 1) / thread_num; | size_t once_compute_size = (lens + thread_num - 1) / thread_num; | ||||
| if (once_compute_size < 1) { | |||||
| MS_LOG(ERROR) << "Invalid value: once_compute_size " << once_compute_size; | |||||
| return; | |||||
| } | |||||
| while (start < lens) { | while (start < lens) { | ||||
| size_t end = (start + once_compute_size) > lens ? lens : (start + once_compute_size); | size_t end = (start + once_compute_size) > lens ? lens : (start + once_compute_size); | ||||
| threads.emplace_back(std::thread(&ArithmeticCPUKernel::Less<T>, this, input1, input2, output, start, end)); | threads.emplace_back(std::thread(&ArithmeticCPUKernel::Less<T>, this, input1, input2, output, start, end)); | ||||
| @@ -214,11 +222,15 @@ void ArithmeticCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs, co | |||||
| size_t thread_num = lens < 128 * max_thread_num ? std::ceil(lens / 128.0) : max_thread_num; | size_t thread_num = lens < 128 * max_thread_num ? std::ceil(lens / 128.0) : max_thread_num; | ||||
| MS_LOG(INFO) << "Lens=" << lens << "; use thread_num=" << thread_num << "; max_thread_num: " << max_thread_num; | MS_LOG(INFO) << "Lens=" << lens << "; use thread_num=" << thread_num << "; max_thread_num: " << max_thread_num; | ||||
| std::vector<std::thread> threads; | std::vector<std::thread> threads; | ||||
| if (thread_num < 1) { | |||||
| MS_LOG(ERROR) << "Invalid value: thread_num " << thread_num; | |||||
| return; | |||||
| } | |||||
| threads.reserve(thread_num); | threads.reserve(thread_num); | ||||
| size_t start = 0; | size_t start = 0; | ||||
| size_t once_compute_size = (lens + thread_num - 1) / thread_num; | size_t once_compute_size = (lens + thread_num - 1) / thread_num; | ||||
| if (thread_num < 1 || once_compute_size < 1) { | |||||
| MS_LOG(ERROR) << "Invalid value: thread_num " << thread_num << "; once_compute_size " << once_compute_size; | |||||
| if (once_compute_size < 1) { | |||||
| MS_LOG(ERROR) << "Invalid value: once_compute_size " << once_compute_size; | |||||
| return; | return; | ||||
| } | } | ||||
| while (start < lens) { | while (start < lens) { | ||||
| @@ -72,11 +72,15 @@ void ArithmeticSelfCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs | |||||
| size_t thread_num = lens < 128 * max_thread_num ? std::ceil(lens / 128.0) : max_thread_num; | size_t thread_num = lens < 128 * max_thread_num ? std::ceil(lens / 128.0) : max_thread_num; | ||||
| MS_LOG(INFO) << "Lens=" << lens << "; use thread_num=" << thread_num << "; max_thread_num: " << max_thread_num; | MS_LOG(INFO) << "Lens=" << lens << "; use thread_num=" << thread_num << "; max_thread_num: " << max_thread_num; | ||||
| std::vector<std::thread> threads; | std::vector<std::thread> threads; | ||||
| if (thread_num < 1) { | |||||
| MS_LOG(ERROR) << "Invalid value: thread_num " << thread_num; | |||||
| return; | |||||
| } | |||||
| threads.reserve(thread_num); | threads.reserve(thread_num); | ||||
| size_t start = 0; | size_t start = 0; | ||||
| size_t once_compute_size = (lens + thread_num - 1) / thread_num; | size_t once_compute_size = (lens + thread_num - 1) / thread_num; | ||||
| if (thread_num < 1 || once_compute_size < 1) { | |||||
| MS_LOG(ERROR) << "Invalid value: thread_num " << thread_num << "; once_compute_size " << once_compute_size; | |||||
| if (once_compute_size < 1) { | |||||
| MS_LOG(ERROR) << "Invalid value: once_compute_size " << once_compute_size; | |||||
| return; | return; | ||||
| } | } | ||||
| while (start < lens) { | while (start < lens) { | ||||
| @@ -40,11 +40,15 @@ void LaunchCast(const std::vector<kernel::AddressPtr> &inputs, const std::vector | |||||
| size_t thread_num = lens < 128 * max_thread_num ? std::ceil(lens / 128.0) : max_thread_num; | size_t thread_num = lens < 128 * max_thread_num ? std::ceil(lens / 128.0) : max_thread_num; | ||||
| MS_LOG(INFO) << "Lens=" << lens << "; use thread_num=" << thread_num << "; max_thread_num: " << max_thread_num; | MS_LOG(INFO) << "Lens=" << lens << "; use thread_num=" << thread_num << "; max_thread_num: " << max_thread_num; | ||||
| std::vector<std::thread> threads; | std::vector<std::thread> threads; | ||||
| if (thread_num < 1) { | |||||
| MS_LOG(ERROR) << "Invalid value: thread_num " << thread_num; | |||||
| return; | |||||
| } | |||||
| threads.reserve(thread_num); | threads.reserve(thread_num); | ||||
| size_t start = 0; | size_t start = 0; | ||||
| size_t once_compute_size = (lens + thread_num - 1) / thread_num; | size_t once_compute_size = (lens + thread_num - 1) / thread_num; | ||||
| if (thread_num < 1 || once_compute_size < 1) { | |||||
| MS_LOG(ERROR) << "Invalid value: thread_num " << thread_num << "; once_compute_size " << once_compute_size; | |||||
| if (once_compute_size < 1) { | |||||
| MS_LOG(ERROR) << "Invalid value: once_compute_size " << once_compute_size; | |||||
| return; | return; | ||||
| } | } | ||||
| while (start < lens) { | while (start < lens) { | ||||
| @@ -146,11 +146,15 @@ void EltWiseGradCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs, c | |||||
| size_t thread_num = lens < 128 * max_thread_num ? std::ceil(lens / 128.0) : max_thread_num; | size_t thread_num = lens < 128 * max_thread_num ? std::ceil(lens / 128.0) : max_thread_num; | ||||
| MS_LOG(INFO) << "Lens=" << lens << "; use thread_num=" << thread_num << "; max_thread_num: " << max_thread_num; | MS_LOG(INFO) << "Lens=" << lens << "; use thread_num=" << thread_num << "; max_thread_num: " << max_thread_num; | ||||
| std::vector<std::thread> threads; | std::vector<std::thread> threads; | ||||
| if (thread_num < 1) { | |||||
| MS_LOG(ERROR) << "Invalid value: thread_num " << thread_num; | |||||
| return; | |||||
| } | |||||
| threads.reserve(thread_num); | threads.reserve(thread_num); | ||||
| size_t start = 0; | size_t start = 0; | ||||
| size_t once_compute_size = (lens + thread_num - 1) / thread_num; | size_t once_compute_size = (lens + thread_num - 1) / thread_num; | ||||
| if (thread_num < 1 || once_compute_size < 1) { | |||||
| MS_LOG(ERROR) << "Invalid value: thread_num " << thread_num << "; once_compute_size " << once_compute_size; | |||||
| if (once_compute_size < 1) { | |||||
| MS_LOG(ERROR) << "Invalid value: once_compute_size " << once_compute_size; | |||||
| return; | return; | ||||
| } | } | ||||
| while (start < lens) { | while (start < lens) { | ||||
| @@ -83,7 +83,7 @@ void PoolingGradCPUKernel::ChannelPoolingGrad(const float *input, const float *d | |||||
| size_t diff_index = 0; | size_t diff_index = 0; | ||||
| for (size_t h = 0; h < dst_shape_[2]; ++h) { | for (size_t h = 0; h < dst_shape_[2]; ++h) { | ||||
| box[0].first = IntToSize(std::max(h_start, 0)); | box[0].first = IntToSize(std::max(h_start, 0)); | ||||
| box[0].second = IntToSize(std::min(h_start + SizeToInt(kernel_size_[1]), src_height)); | |||||
| box[0].second = IntToSize(std::min(h_start + SizeToInt(kernel_size_[0]), src_height)); | |||||
| for (size_t w = 0; w < src_shape_[3]; ++w) { | for (size_t w = 0; w < src_shape_[3]; ++w) { | ||||
| row_max_pair[w].first = 0; | row_max_pair[w].first = 0; | ||||
| row_max_pair[w].second = 0; | row_max_pair[w].second = 0; | ||||
| @@ -91,7 +91,7 @@ void PoolingGradCPUKernel::ChannelPoolingGrad(const float *input, const float *d | |||||
| int w_start = -padding_l_[1]; | int w_start = -padding_l_[1]; | ||||
| for (size_t w = 0; w < dst_shape_[3]; ++w) { | for (size_t w = 0; w < dst_shape_[3]; ++w) { | ||||
| box[1].first = IntToSize(std::max(w_start, 0)); | box[1].first = IntToSize(std::max(w_start, 0)); | ||||
| box[1].second = IntToSize(std::min(w_start + SizeToInt(kernel_size_[0]), src_width)); | |||||
| box[1].second = IntToSize(std::min(w_start + SizeToInt(kernel_size_[1]), src_width)); | |||||
| RowPoolingGrad(input, output, diff[diff_index], box, &row_max_pair); | RowPoolingGrad(input, output, diff[diff_index], box, &row_max_pair); | ||||
| diff_index += 1; | diff_index += 1; | ||||
| w_start += stride_; | w_start += stride_; | ||||