|
|
|
@@ -76,15 +76,16 @@ void ArithmeticCPUKernel::RealDiv(const T *input1, const T *input2, T *out, size |
|
|
|
GenIndex(i, &idx); |
|
|
|
auto dividend = input1[idx[0]]; |
|
|
|
auto divisor = input2[idx[1]]; |
|
|
|
if (divisor == 0) { |
|
|
|
if (dividend == 0) { |
|
|
|
auto zero = (T)0; |
|
|
|
if (divisor == zero) { |
|
|
|
if (dividend == zero) { |
|
|
|
out[i] = std::numeric_limits<T>::quiet_NaN(); |
|
|
|
continue; |
|
|
|
} |
|
|
|
if (std::numeric_limits<T>::has_infinity) { |
|
|
|
out[i] = dividend > 0 ? std::numeric_limits<T>::infinity() : -std::numeric_limits<T>::infinity(); |
|
|
|
out[i] = dividend > zero ? std::numeric_limits<T>::infinity() : -std::numeric_limits<T>::infinity(); |
|
|
|
} else { |
|
|
|
out[i] = dividend > 0 ? std::numeric_limits<T>::max() : std::numeric_limits<T>::min(); |
|
|
|
out[i] = dividend > zero ? std::numeric_limits<T>::max() : std::numeric_limits<T>::min(); |
|
|
|
} |
|
|
|
continue; |
|
|
|
} |
|
|
|
@@ -102,15 +103,16 @@ void ArithmeticCPUKernel::Div(const T *input1, const T *input2, T *out, size_t s |
|
|
|
GenIndex(i, &idx); |
|
|
|
auto dividend = input1[idx[0]]; |
|
|
|
auto divisor = input2[idx[1]]; |
|
|
|
if (divisor == 0) { |
|
|
|
if (dividend == 0) { |
|
|
|
auto zero = (T)0; |
|
|
|
if (divisor == zero) { |
|
|
|
if (dividend == zero) { |
|
|
|
out[i] = std::numeric_limits<T>::quiet_NaN(); |
|
|
|
continue; |
|
|
|
} |
|
|
|
if (std::numeric_limits<T>::has_infinity) { |
|
|
|
out[i] = dividend > 0 ? std::numeric_limits<T>::infinity() : -std::numeric_limits<T>::infinity(); |
|
|
|
out[i] = dividend > zero ? std::numeric_limits<T>::infinity() : -std::numeric_limits<T>::infinity(); |
|
|
|
} else { |
|
|
|
out[i] = dividend > 0 ? std::numeric_limits<T>::max() : std::numeric_limits<T>::min(); |
|
|
|
out[i] = dividend > zero ? std::numeric_limits<T>::max() : std::numeric_limits<T>::min(); |
|
|
|
} |
|
|
|
continue; |
|
|
|
} |
|
|
|
@@ -128,19 +130,20 @@ void ArithmeticCPUKernel::FloorDiv(const T *input1, const T *input2, T *out, siz |
|
|
|
GenIndex(i, &idx); |
|
|
|
auto dividend = input1[idx[0]]; |
|
|
|
auto divisor = input2[idx[1]]; |
|
|
|
if (divisor == 0) { |
|
|
|
if (dividend == 0) { |
|
|
|
auto zero = (T)0; |
|
|
|
if (divisor == zero) { |
|
|
|
if (dividend == zero) { |
|
|
|
out[i] = std::numeric_limits<T>::quiet_NaN(); |
|
|
|
continue; |
|
|
|
} |
|
|
|
if (std::numeric_limits<T>::has_infinity) { |
|
|
|
out[i] = dividend > 0 ? std::numeric_limits<T>::infinity() : -std::numeric_limits<T>::infinity(); |
|
|
|
out[i] = dividend > zero ? std::numeric_limits<T>::infinity() : -std::numeric_limits<T>::infinity(); |
|
|
|
} else { |
|
|
|
out[i] = dividend > 0 ? std::numeric_limits<T>::max() : std::numeric_limits<T>::min(); |
|
|
|
out[i] = dividend > zero ? std::numeric_limits<T>::max() : std::numeric_limits<T>::min(); |
|
|
|
} |
|
|
|
continue; |
|
|
|
} |
|
|
|
out[i] = floor(dividend / divisor); |
|
|
|
out[i] = (T)floor(static_cast<double>(dividend) / static_cast<double>(divisor)); |
|
|
|
} |
|
|
|
}; |
|
|
|
CPUKernelUtils::ParallelFor(task, size); |
|
|
|
@@ -295,7 +298,7 @@ void ArithmeticCPUKernel::Atan2(const T *input1, const T *input2, T *out, size_t |
|
|
|
for (size_t i = start; i < end; i++) { |
|
|
|
std::vector<size_t> idx; |
|
|
|
GenIndex(i, &idx); |
|
|
|
out[i] = atan2(input1[idx[0]], input2[idx[1]]); |
|
|
|
out[i] = (T)atan2(static_cast<double>(input1[idx[0]]), static_cast<double>(input2[idx[1]])); |
|
|
|
} |
|
|
|
}; |
|
|
|
CPUKernelUtils::ParallelFor(task, size); |
|
|
|
@@ -348,8 +351,8 @@ void ArithmeticCPUKernel::InitKernel(const CNodePtr &kernel_node) { |
|
|
|
CPUKernelUtils::GetElementNumEveryDim(input_shape0_, &input_element_num0_); |
|
|
|
CPUKernelUtils::GetElementNumEveryDim(input_shape1_, &input_element_num1_); |
|
|
|
CPUKernelUtils::GetElementNumEveryDim(output_shape_, &output_element_num_); |
|
|
|
dtype_ = AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, 0); |
|
|
|
if (dtype_ != AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, 1)) { |
|
|
|
dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0); |
|
|
|
if (dtype_ != AnfAlgo::GetInputDeviceDataType(kernel_node, 1)) { |
|
|
|
MS_LOG(EXCEPTION) << "Input0 and input1 must has the same data type"; |
|
|
|
} |
|
|
|
target_dtype_ = AnfAlgo::GetOutputInferDataType(kernel_node, 0); |
|
|
|
@@ -358,14 +361,26 @@ void ArithmeticCPUKernel::InitKernel(const CNodePtr &kernel_node) { |
|
|
|
bool ArithmeticCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, |
|
|
|
const std::vector<kernel::AddressPtr> & /*workspace*/, |
|
|
|
const std::vector<kernel::AddressPtr> &outputs) { |
|
|
|
if (dtype_ == kNumberTypeInt32 || dtype_ == kNumberTypeInt16 || dtype_ == kNumberTypeInt8) { |
|
|
|
if (dtype_ == kNumberTypeInt32) { |
|
|
|
LaunchKernel<int>(inputs, outputs); |
|
|
|
} else if (dtype_ == kNumberTypeFloat32 || dtype_ == kNumberTypeFloat16 || dtype_ == kNumberTypeFloat64) { |
|
|
|
} else if (dtype_ == kNumberTypeFloat32) { |
|
|
|
LaunchKernel<float>(inputs, outputs); |
|
|
|
} else if (dtype_ == kNumberTypeInt64) { |
|
|
|
LaunchKernel<int64_t>(inputs, outputs); |
|
|
|
} else if (dtype_ == kNumberTypeBool) { |
|
|
|
LaunchKernelLogic<bool>(inputs, outputs); |
|
|
|
} else if (dtype_ == kNumberTypeInt8) { |
|
|
|
LaunchKernel<int8_t>(inputs, outputs); |
|
|
|
} else if (dtype_ == kNumberTypeInt16) { |
|
|
|
LaunchKernel<int16_t>(inputs, outputs); |
|
|
|
} else if (dtype_ == kNumberTypeFloat16) { |
|
|
|
LaunchKernel<float16>(inputs, outputs); |
|
|
|
} else if (dtype_ == kNumberTypeFloat64) { |
|
|
|
LaunchKernel<double>(inputs, outputs); |
|
|
|
} else if (dtype_ == kNumberTypeUInt8) { |
|
|
|
LaunchKernel<uint8_t>(inputs, outputs); |
|
|
|
} else if (dtype_ == kNumberTypeUInt32) { |
|
|
|
LaunchKernel<uint32_t>(inputs, outputs); |
|
|
|
} else { |
|
|
|
MS_LOG(EXCEPTION) << "Data type " << TypeIdLabel(dtype_) << "is not support."; |
|
|
|
} |
|
|
|
|