| @@ -1,5 +1,5 @@ | |||||
| /** | /** | ||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * Copyright 2020-2021 Huawei Technologies Co., Ltd | |||||
| * | * | ||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| * you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
| @@ -18,6 +18,10 @@ | |||||
| namespace mindspore { | namespace mindspore { | ||||
| namespace kernel { | namespace kernel { | ||||
| MS_REG_GPU_KERNEL_TWO( | |||||
| ArgMaxWithValue, | |||||
| KernelAttr().AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeFloat64), | |||||
| ArgmaxWithValueGpuKernel, double, int) | |||||
| MS_REG_GPU_KERNEL_TWO( | MS_REG_GPU_KERNEL_TWO( | ||||
| ArgMaxWithValue, | ArgMaxWithValue, | ||||
| KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeFloat32), | KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeFloat32), | ||||
| @@ -1,5 +1,5 @@ | |||||
| /** | /** | ||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * Copyright 2020-2021 Huawei Technologies Co., Ltd | |||||
| * | * | ||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| * you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
| @@ -18,6 +18,8 @@ | |||||
| namespace mindspore { | namespace mindspore { | ||||
| namespace kernel { | namespace kernel { | ||||
| MS_REG_GPU_KERNEL_ONE(BroadcastTo, KernelAttr().AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64), | |||||
| BroadcastToGpuKernel, double) | |||||
| MS_REG_GPU_KERNEL_ONE(BroadcastTo, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), | MS_REG_GPU_KERNEL_ONE(BroadcastTo, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), | ||||
| BroadcastToGpuKernel, float) | BroadcastToGpuKernel, float) | ||||
| MS_REG_GPU_KERNEL_ONE(BroadcastTo, KernelAttr().AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16), | MS_REG_GPU_KERNEL_ONE(BroadcastTo, KernelAttr().AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16), | ||||
| @@ -34,6 +34,14 @@ MS_REG_GPU_KERNEL_TWO(TensorScatterUpdate, | |||||
| .AddOutputAttr(kNumberTypeFloat32), | .AddOutputAttr(kNumberTypeFloat32), | ||||
| TensorScatterUpdateGpuFwdKernel, float, int) | TensorScatterUpdateGpuFwdKernel, float, int) | ||||
| MS_REG_GPU_KERNEL_TWO(TensorScatterUpdate, | |||||
| KernelAttr() | |||||
| .AddInputAttr(kNumberTypeFloat64) | |||||
| .AddInputAttr(kNumberTypeInt32) | |||||
| .AddInputAttr(kNumberTypeFloat64) | |||||
| .AddOutputAttr(kNumberTypeFloat64), | |||||
| TensorScatterUpdateGpuFwdKernel, double, int) | |||||
| MS_REG_GPU_KERNEL_TWO(TensorScatterUpdate, | MS_REG_GPU_KERNEL_TWO(TensorScatterUpdate, | ||||
| KernelAttr() | KernelAttr() | ||||
| .AddInputAttr(kNumberTypeInt8) | .AddInputAttr(kNumberTypeInt8) | ||||
| @@ -1,5 +1,5 @@ | |||||
| /** | /** | ||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * Copyright 2020-2021 Huawei Technologies Co., Ltd | |||||
| * | * | ||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| * you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
| @@ -113,6 +113,9 @@ void NoBroadcastGrad(const int &nums, const bool &grad_x1, const bool &grad_x2, | |||||
| NoBroadcastGradKernel<<<GET_BLOCKS(nums), GET_THREADS, 0, stream>>>(nums, grad_x1, grad_x2, op, x1, x2, dy, dx1, dx2); | NoBroadcastGradKernel<<<GET_BLOCKS(nums), GET_THREADS, 0, stream>>>(nums, grad_x1, grad_x2, op, x1, x2, dy, dx1, dx2); | ||||
| } | } | ||||
| template void NoBroadcastGrad(const int &nums, const bool &grad_x1, const bool &grad_x2, enum BroadcastGradOpType op, | |||||
| const double *x1, const double *x2, const double *dy, double *dx1, double *dx2, | |||||
| cudaStream_t stream); | |||||
| template void NoBroadcastGrad(const int &nums, const bool &grad_x1, const bool &grad_x2, enum BroadcastGradOpType op, | template void NoBroadcastGrad(const int &nums, const bool &grad_x1, const bool &grad_x2, enum BroadcastGradOpType op, | ||||
| const float *x1, const float *x2, const float *dy, float *dx1, float *dx2, | const float *x1, const float *x2, const float *dy, float *dx1, float *dx2, | ||||
| cudaStream_t stream); | cudaStream_t stream); | ||||
| @@ -124,6 +127,10 @@ template void NoBroadcastGrad(const int &nums, const bool &grad_x1, const bool & | |||||
| template void NoBroadcastGrad(const int &nums, const bool &grad_x1, const bool &grad_x2, enum BroadcastGradOpType op, | template void NoBroadcastGrad(const int &nums, const bool &grad_x1, const bool &grad_x2, enum BroadcastGradOpType op, | ||||
| const int64_t *x1, const int64_t *x2, const int64_t *dy, int64_t *dx1, int64_t *dx2, | const int64_t *x1, const int64_t *x2, const int64_t *dy, int64_t *dx1, int64_t *dx2, | ||||
| cudaStream_t stream); | cudaStream_t stream); | ||||
| template void BroadcastGrad(const int &l0, const int &l1, const int &l2, const int &l3, const int &r0, const int &r1, | |||||
| const int &r2, const int &r3, const int &d0, const int &d1, const int &d2, const int &d3, | |||||
| const bool &grad_x1, const bool &grad_x2, enum BroadcastGradOpType op, const double *x1, | |||||
| const double *x2, const double *dy, double *dx1, double *dx2, cudaStream_t stream); | |||||
| template void BroadcastGrad(const int &l0, const int &l1, const int &l2, const int &l3, const int &r0, const int &r1, | template void BroadcastGrad(const int &l0, const int &l1, const int &l2, const int &l3, const int &r0, const int &r1, | ||||
| const int &r2, const int &r3, const int &d0, const int &d1, const int &d2, const int &d3, | const int &r2, const int &r3, const int &d0, const int &d1, const int &d2, const int &d3, | ||||
| const bool &grad_x1, const bool &grad_x2, enum BroadcastGradOpType op, const float *x1, | const bool &grad_x1, const bool &grad_x2, enum BroadcastGradOpType op, const float *x1, | ||||
| @@ -555,6 +555,9 @@ void BroadcastTo(const size_t &i0, const size_t &i1, const size_t &i2, const siz | |||||
| output_addr); | output_addr); | ||||
| } | } | ||||
| template void BroadcastTo(const size_t &i0, const size_t &i1, const size_t &i2, const size_t &i3, const size_t &o0, | |||||
| const size_t &o1, const size_t &o2, const size_t &o3, const double *input_addr, | |||||
| double *output_addr, cudaStream_t stream); | |||||
| template void BroadcastTo(const size_t &i0, const size_t &i1, const size_t &i2, const size_t &i3, const size_t &o0, | template void BroadcastTo(const size_t &i0, const size_t &i1, const size_t &i2, const size_t &i3, const size_t &o0, | ||||
| const size_t &o1, const size_t &o2, const size_t &o3, const float *input_addr, | const size_t &o1, const size_t &o2, const size_t &o3, const float *input_addr, | ||||
| float *output_addr, cudaStream_t stream); | float *output_addr, cudaStream_t stream); | ||||
| @@ -1,5 +1,5 @@ | |||||
| /** | /** | ||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * Copyright 2020-2021 Huawei Technologies Co., Ltd | |||||
| * | * | ||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| * you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
| @@ -327,6 +327,8 @@ void CalGeneralReduction(bool small, const T *input, const size_t bound, const s | |||||
| return; | return; | ||||
| } | } | ||||
| template void CalGeneralReduction(bool small, const double *input, const size_t bound_, const size_t outerSize_, | |||||
| const size_t innerSize_, int *index, double *output, cudaStream_t cuda_stream); | |||||
| template void CalGeneralReduction(bool small, const float *input, const size_t bound_, const size_t outerSize_, | template void CalGeneralReduction(bool small, const float *input, const size_t bound_, const size_t outerSize_, | ||||
| const size_t innerSize_, int *index, float *output, cudaStream_t cuda_stream); | const size_t innerSize_, int *index, float *output, cudaStream_t cuda_stream); | ||||
| template void CalGeneralReduction(bool small, const half *input, const size_t bound_, const size_t outerSize_, | template void CalGeneralReduction(bool small, const half *input, const size_t bound_, const size_t outerSize_, | ||||
| @@ -67,6 +67,11 @@ template void TensorScatterUpdate<float, int>(float *input, int *indices, float | |||||
| const size_t &output_size, const size_t &indices_dim_0, | const size_t &output_size, const size_t &indices_dim_0, | ||||
| const size_t &indices_dim_1, int *indices_stride, int *work_shape, | const size_t &indices_dim_1, int *indices_stride, int *work_shape, | ||||
| cudaStream_t stream); | cudaStream_t stream); | ||||
| template void TensorScatterUpdate<double, int>(double *input, int *indices, double *update, double *output, | |||||
| const size_t &block_size, const size_t &input_size, | |||||
| const size_t &output_size, const size_t &indices_dim_0, | |||||
| const size_t &indices_dim_1, int *indices_stride, int *work_shape, | |||||
| cudaStream_t stream); | |||||
| template void TensorScatterUpdate<char, int>(char *input, int *indices, char *update, char *output, | template void TensorScatterUpdate<char, int>(char *input, int *indices, char *update, char *output, | ||||
| const size_t &block_size, const size_t &input_size, | const size_t &block_size, const size_t &input_size, | ||||
| const size_t &output_size, const size_t &indices_dim_0, | const size_t &output_size, const size_t &indices_dim_0, | ||||
| @@ -23,6 +23,10 @@ MS_REG_GPU_KERNEL_ONE( | |||||
| Greater, | Greater, | ||||
| KernelAttr().AddInputAttr(kNumberTypeFloat64).AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeBool), | KernelAttr().AddInputAttr(kNumberTypeFloat64).AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeBool), | ||||
| BroadcastOpGpuKernel, double) | BroadcastOpGpuKernel, double) | ||||
| MS_REG_GPU_KERNEL_ONE( | |||||
| Minimum, | |||||
| KernelAttr().AddInputAttr(kNumberTypeFloat64).AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64), | |||||
| BroadcastOpGpuKernel, double) | |||||
| MS_REG_GPU_KERNEL_ONE( | MS_REG_GPU_KERNEL_ONE( | ||||
| Less, KernelAttr().AddInputAttr(kNumberTypeFloat64).AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeBool), | Less, KernelAttr().AddInputAttr(kNumberTypeFloat64).AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeBool), | ||||
| BroadcastOpGpuKernel, double) | BroadcastOpGpuKernel, double) | ||||
| @@ -46,6 +50,9 @@ MS_REG_GPU_KERNEL_ONE( | |||||
| RealDiv, | RealDiv, | ||||
| KernelAttr().AddInputAttr(kNumberTypeFloat64).AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64), | KernelAttr().AddInputAttr(kNumberTypeFloat64).AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64), | ||||
| BroadcastOpGpuKernel, double) | BroadcastOpGpuKernel, double) | ||||
| MS_REG_GPU_KERNEL_ONE( | |||||
| Pow, KernelAttr().AddInputAttr(kNumberTypeFloat64).AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64), | |||||
| BroadcastOpGpuKernel, double) | |||||
| // fp32 | // fp32 | ||||
| MS_REG_GPU_KERNEL_ONE( | MS_REG_GPU_KERNEL_ONE( | ||||
| @@ -1,5 +1,5 @@ | |||||
| /** | /** | ||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * Copyright 2020-2021 Huawei Technologies Co., Ltd | |||||
| * | * | ||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| * you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
| @@ -18,6 +18,14 @@ | |||||
| namespace mindspore { | namespace mindspore { | ||||
| namespace kernel { | namespace kernel { | ||||
| MS_REG_GPU_KERNEL_ONE(MinimumGrad, | |||||
| KernelAttr() | |||||
| .AddInputAttr(kNumberTypeFloat64) | |||||
| .AddInputAttr(kNumberTypeFloat64) | |||||
| .AddInputAttr(kNumberTypeFloat64) | |||||
| .AddOutputAttr(kNumberTypeFloat64) | |||||
| .AddOutputAttr(kNumberTypeFloat64), | |||||
| BroadcastOpGradGpuKernel, double) | |||||
| MS_REG_GPU_KERNEL_ONE(MinimumGrad, | MS_REG_GPU_KERNEL_ONE(MinimumGrad, | ||||
| KernelAttr() | KernelAttr() | ||||
| .AddInputAttr(kNumberTypeFloat32) | .AddInputAttr(kNumberTypeFloat32) | ||||