| @@ -39,7 +39,7 @@ void SearchSortedCPUKernel<S, T>::InitKernel(const CNodePtr &kernel_node) { | |||
| template <typename S, typename T> | |||
| const S *SearchSortedCPUKernel<S, T>::CustomizedLowerBound(const S *seq_start, const S *seq_end, const S key) { | |||
| while (seq_start < seq_end) { | |||
| const S *mid = seq_start + ((seq_end - seq_start) >> 1); | |||
| const S *mid = seq_start + ((seq_end - seq_start) / 2); | |||
| if (!(key <= *mid)) { | |||
| seq_start = mid + 1; | |||
| } else { | |||
| @@ -61,11 +61,12 @@ bool SearchSortedCPUKernel<S, T>::Launch(const std::vector<kernel::AddressPtr> & | |||
| size_t seq_dim = sequence_shape_.size(); | |||
| size_t search_repeat = values_shape_.back(); | |||
| auto task = [&](size_t start, size_t end) { | |||
| auto task = [this, &sequence, &values, &output, seq_dim, search_repeat](size_t start, size_t end) { | |||
| for (size_t i = start; i < end; i++) { | |||
| auto seq_start = (seq_dim == 1) ? sequence : sequence + (i / search_repeat) * search_len; | |||
| output[i] = right_ ? std::upper_bound(seq_start, seq_start + search_len, values[i]) - seq_start | |||
| : CustomizedLowerBound(seq_start, seq_start + search_len, values[i]) - seq_start; | |||
| auto result = right_ ? std::upper_bound(seq_start, seq_start + search_len, values[i]) - seq_start | |||
| : CustomizedLowerBound(seq_start, seq_start + search_len, values[i]) - seq_start; | |||
| output[i] = static_cast<T>(result); | |||
| } | |||
| }; | |||
| CPUKernelUtils::ParallelFor(task, elem_num); | |||
| @@ -92,8 +93,8 @@ void SearchSortedCPUKernel<S, T>::CheckParam(const std::vector<AddressPtr> &inpu | |||
| } | |||
| auto sequence = reinterpret_cast<S *>(inputs[0]->addr); | |||
| size_t list_count = accumulate(sequence_shape_.begin(), sequence_shape_.end() - 1, 1, std::multiplies<int>()); | |||
| auto task = [&](size_t start, size_t end) { | |||
| int list_count = accumulate(sequence_shape_.begin(), sequence_shape_.end() - 1, 1, std::multiplies<int>()); | |||
| auto task = [this, &sequence](size_t start, size_t end) { | |||
| for (size_t i = start; i < end; i++) { | |||
| for (size_t j = 0; j < search_len - 1; j++) { | |||
| if (sequence[i * search_len + j] > sequence[i * search_len + j + 1]) { | |||
| @@ -104,6 +105,5 @@ void SearchSortedCPUKernel<S, T>::CheckParam(const std::vector<AddressPtr> &inpu | |||
| }; | |||
| CPUKernelUtils::ParallelFor(task, list_count); | |||
| } | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -42,7 +42,7 @@ class SearchSortedCPUKernel : public CPUKernel { | |||
| std::vector<size_t> sequence_shape_; | |||
| std::vector<size_t> values_shape_; | |||
| std::vector<size_t> output_shape_; | |||
| size_t search_len; | |||
| size_t search_len{0}; | |||
| }; | |||
| MS_REG_CPU_KERNEL_T_S( | |||
| @@ -104,8 +104,6 @@ MS_REG_CPU_KERNEL_T_S( | |||
| SearchSorted, | |||
| KernelAttr().AddInputAttr(kNumberTypeInt8).AddInputAttr(kNumberTypeInt8).AddOutputAttr(kNumberTypeInt64), | |||
| SearchSortedCPUKernel, int8_t, int64_t); | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SEARCHSORTED_CPU_KERNEL_H_ | |||
| @@ -35,12 +35,12 @@ void SGDCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) { | |||
| template <typename T> | |||
| void SGDCPUKernel<T>::CheckParam(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs) { | |||
| // inputs: params, grad, lr, accum, momentum, stat | |||
| // inputs: param, grad, lr, accum, momentum, stat | |||
| if (inputs.size() != kInputSize) { | |||
| MS_LOG(EXCEPTION) << "Input number is " << inputs.size() << ", but SGD needs 6 inputs."; | |||
| } | |||
| // output: param | |||
| // output: output_param | |||
| if (outputs.size() != kOutputSize) { | |||
| MS_LOG(EXCEPTION) << "Output number is " << outputs.size() << ", but SGD needs 1 outputs."; | |||
| } | |||
| @@ -60,18 +60,20 @@ bool SGDCPUKernel<T>::Launch(const std::vector<AddressPtr> &inputs, const std::v | |||
| auto output_param = reinterpret_cast<T *>(outputs[0]->addr); | |||
| size_t elem_num = inputs[0]->size / sizeof(T); | |||
| auto task = [&](size_t start, size_t end) { | |||
| auto task = [this, ¶m, &grad, &lr, &accum, &momentum, &stat, &output_param](size_t start, size_t end) { | |||
| T ZERO = static_cast<T>(0); | |||
| T ONE = static_cast<T>(1); | |||
| for (size_t i = start; i < end; i++) { | |||
| T grad_new = grad[i]; | |||
| if (weight_decay_ > 0) { | |||
| if (weight_decay_ > static_cast<float>(0.0)) { | |||
| grad_new += param[i] * static_cast<T>(weight_decay_); | |||
| } | |||
| if (momentum[0] > static_cast<T>(0)) { | |||
| if (stat[i] > static_cast<T>(0)) { | |||
| if (momentum[0] > ZERO) { | |||
| if (stat[i] > ZERO) { | |||
| accum[i] = grad_new; | |||
| stat[i] = static_cast<T>(0); | |||
| stat[i] = ZERO; | |||
| } else { | |||
| accum[i] = accum[i] * momentum[0] + static_cast<T>(1.0 - dampening_) * grad_new; | |||
| accum[i] = accum[i] * momentum[0] + (ONE - static_cast<T>(dampening_)) * grad_new; | |||
| } | |||
| if (nesterov_) { | |||
| grad_new += accum[i] * momentum[0]; | |||
| @@ -36,8 +36,8 @@ class SGDCPUKernel : public CPUKernel { | |||
| private: | |||
| static void CheckParam(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs); | |||
| float dampening_; | |||
| float weight_decay_; | |||
| float dampening_{0.0}; | |||
| float weight_decay_{0.0}; | |||
| bool nesterov_{true}; | |||
| }; | |||