!31130 fix bug of fake quant perchannel

Merge pull request !31130 from cjh9368/clean_static_error_master
4 years ago · c9cb729cd3
--- a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/cuda_ops/fake_quant_perchannel_impl.cu
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/cuda_ops/fake_quant_perchannel_impl.cu
@@ -1,138 +1,138 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #include <thrust/extrema.h>
 #include <thrust/device_vector.h>
 #include <thrust/execution_policy.h>
 #include <thrust/reduce.h>
 #include <thrust/pair.h>
 #include "fake_quant_perchannel_impl.cuh"

 /**
 * Find the nudge min, max and scale value as output.
 * @param input_min array
 * @param input_max array
 * @param quant_min 1 << bit -1
 * @param quant_max 0
 * @param nudge_min array
 * @param nudge_max array
 * @param scale array
 * @param channel_num
 * @return
 */
 __global__ void NudgeMinMaxPerChannel(float *input_min, float *input_max, const float quant_min, const float quant_max,
                                      float *nudge_min, float *nudge_max, float *scale, int channel_num,
                                      const bool symmetric) {
  float zp_from_min = 0.f;
  float nudge_zp = 0.f;

  for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < channel_num; i += blockDim.x * gridDim.x) {
    if (symmetric) {
      input_max[i] = abs(input_min[0]) < input_max[i] ? input_max[i] : -input_min[i];
      input_min[i] = abs(input_min[i]) < input_max[i] ? -input_max[i] : input_min[i];
    }
    if ((quant_max - quant_min) == 0 || (input_max[i] - input_min[i]) == 0) {
      scale[i] = 0.f;
      zp_from_min = 0.f;
    } else {
      scale[i] = (input_max[i] - input_min[i]) / (quant_max - quant_min);
      zp_from_min = quant_min - input_min[i] / scale[i];
    }

    if (zp_from_min <= quant_min) {
      nudge_zp = quant_min;
    } else if (zp_from_min >= quant_max) {
      nudge_zp = quant_max;
    } else {
      nudge_zp = round(zp_from_min);
    }

    nudge_min[i] = (quant_min - nudge_zp) * (scale[i]);
    nudge_max[i] = (quant_max - nudge_zp) * (scale[i]);
  }
 }

 void CalNudgePerChannel(float *input_min, float *input_max, const float quant_min, const float quant_max,
                        float *nudge_min, float *nudge_max, float *scale, const int channel_num, const bool symmetric,
                        cudaStream_t cuda_stream) {
  NudgeMinMaxPerChannel<<<GET_BLOCKS(channel_num), GET_THREADS, 0, cuda_stream>>>(
    input_min, input_max, quant_min, quant_max, nudge_min, nudge_max, scale, channel_num, symmetric);
 }

 /**
 * Calculate fake quant output according by nudge min, nudge max, nudge scale.
 * @param input - array
 * @param output - array
 * @param total_size - int, purpose for cal the per channel number in filters
 * @param channel_size - int, purpose for cal the per channel number in filters
 * @param nudge_min - array
 * @param nudge_max - array
 * @param scale - array
 * @return
 */
 __global__ void FakeQuantPerChannel(const float *input, float *output, const int total_size, const int channel_size,
                                    const float *nudge_min, const float *nudge_max, const float *scale) {
  float input_x = 0.f;
  int nudge_input = 0;
  int channel_idx = 0;
  int per_channel_num = total_size / channel_size;

  for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < total_size; i += blockDim.x * gridDim.x) {
    input_x = input[i];
    channel_idx = floor(static_cast<double>(i) / static_cast<double>(per_channel_num));
    // clamp input x
    if (input_x < nudge_min[channel_idx]) {
      input_x = nudge_min[channel_idx];
    }
    if (input_x > nudge_max[channel_idx]) {
      input_x = nudge_max[channel_idx];
    }
    // clamp shift
    nudge_input = floor((input_x - nudge_min[channel_idx]) / scale[channel_idx] + 0.5f);

    // quantize
    output[i] = nudge_input * scale[channel_idx] + nudge_min[channel_idx];
  }
 }

 void CalFakeQuantPerChannel(const float *input, float *output, const int total_size, const int channel_size,
                            const float *nudge_min, const float *nudge_max, const float *scale,
                            cudaStream_t cuda_stream) {
  FakeQuantPerChannel<<<GET_BLOCKS(total_size), GET_THREADS, 0, cuda_stream>>>(input, output, total_size, channel_size,
                                                                               nudge_min, nudge_max, scale);
 }

 __global__ void FakeQuantPerChannelGrad(const float *input, const float *gradient, float *output, const int total_size,
                                        const int channel_size, const float *nudge_min, const float *nudge_max) {
  int channel_idx = 0;
  int per_channel_num = total_size / channel_size;

  for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < total_size; i += blockDim.x * gridDim.x) {
    channel_idx = floor(static_cast<double>(i) / static_cast<double>(per_channel_num));
    if (input[i] < nudge_min[channel_idx] || input[i] > nudge_max[channel_idx]) {
      output[i] = 0;
    } else {
      output[i] = gradient[i];
    }
  }
 }

 void CalFakeQuantPerChannelGrad(const float *input, const float *gradient, float *output, const int total_num,
                                const int channel_num, const float *nudge_min, const float *nudge_max,
                                cudaStream_t cuda_stream) {
  FakeQuantPerChannelGrad<<<GET_BLOCKS(channel_num), GET_THREADS, 0, cuda_stream>>>(input, gradient, output, total_num,
                                                                                    channel_num, nudge_min, nudge_max);
 }
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #include <thrust/extrema.h>
 #include <thrust/device_vector.h>
 #include <thrust/execution_policy.h>
 #include <thrust/reduce.h>
 #include <thrust/pair.h>
 #include "fake_quant_perchannel_impl.cuh"

 /**
 * Find the nudge min, max and scale value as output.
 * @param input_min array
 * @param input_max array
 * @param quant_min 1 << bit -1
 * @param quant_max 0
 * @param nudge_min array
 * @param nudge_max array
 * @param scale array
 * @param channel_num
 * @return
 */
 __global__ void NudgeMinMaxPerChannel(float *input_min, float *input_max, const float quant_min, const float quant_max,
                                      float *nudge_min, float *nudge_max, float *scale, int channel_num,
                                      const bool symmetric) {
  float zp_from_min = 0.f;
  float nudge_zp = 0.f;

  for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < channel_num; i += blockDim.x * gridDim.x) {
    if (symmetric) {
      input_max[i] = abs(input_min[i]) < input_max[i] ? input_max[i] : -input_min[i];
      input_min[i] = abs(input_min[i]) < input_max[i] ? -input_max[i] : input_min[i];
    }
    if ((quant_max - quant_min) == 0 || (input_max[i] - input_min[i]) == 0) {
      scale[i] = 0.f;
      zp_from_min = 0.f;
    } else {
      scale[i] = (input_max[i] - input_min[i]) / (quant_max - quant_min);
      zp_from_min = quant_min - input_min[i] / scale[i];
    }

    if (zp_from_min <= quant_min) {
      nudge_zp = quant_min;
    } else if (zp_from_min >= quant_max) {
      nudge_zp = quant_max;
    } else {
      nudge_zp = round(zp_from_min);
    }

    nudge_min[i] = (quant_min - nudge_zp) * (scale[i]);
    nudge_max[i] = (quant_max - nudge_zp) * (scale[i]);
  }
 }

 void CalNudgePerChannel(float *input_min, float *input_max, const float quant_min, const float quant_max,
                        float *nudge_min, float *nudge_max, float *scale, const int channel_num, const bool symmetric,
                        cudaStream_t cuda_stream) {
  NudgeMinMaxPerChannel<<<GET_BLOCKS(channel_num), GET_THREADS, 0, cuda_stream>>>(
    input_min, input_max, quant_min, quant_max, nudge_min, nudge_max, scale, channel_num, symmetric);
 }

 /**
 * Calculate fake quant output according by nudge min, nudge max, nudge scale.
 * @param input - array
 * @param output - array
 * @param total_size - int, purpose for cal the per channel number in filters
 * @param channel_size - int, purpose for cal the per channel number in filters
 * @param nudge_min - array
 * @param nudge_max - array
 * @param scale - array
 * @return
 */
 __global__ void FakeQuantPerChannel(const float *input, float *output, const int total_size, const int channel_size,
                                    const float *nudge_min, const float *nudge_max, const float *scale) {
  float input_x = 0.f;
  int nudge_input = 0;
  int channel_idx = 0;
  int per_channel_num = total_size / channel_size;

  for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < total_size; i += blockDim.x * gridDim.x) {
    input_x = input[i];
    channel_idx = floor(static_cast<double>(i) / static_cast<double>(per_channel_num));
    // clamp input x
    if (input_x < nudge_min[channel_idx]) {
      input_x = nudge_min[channel_idx];
    }
    if (input_x > nudge_max[channel_idx]) {
      input_x = nudge_max[channel_idx];
    }
    // clamp shift
    nudge_input = floor((input_x - nudge_min[channel_idx]) / scale[channel_idx] + 0.5f);

    // quantize
    output[i] = nudge_input * scale[channel_idx] + nudge_min[channel_idx];
  }
 }

 void CalFakeQuantPerChannel(const float *input, float *output, const int total_size, const int channel_size,
                            const float *nudge_min, const float *nudge_max, const float *scale,
                            cudaStream_t cuda_stream) {
  FakeQuantPerChannel<<<GET_BLOCKS(total_size), GET_THREADS, 0, cuda_stream>>>(input, output, total_size, channel_size,
                                                                               nudge_min, nudge_max, scale);
 }

 __global__ void FakeQuantPerChannelGrad(const float *input, const float *gradient, float *output, const int total_size,
                                        const int channel_size, const float *nudge_min, const float *nudge_max) {
  int channel_idx = 0;
  int per_channel_num = total_size / channel_size;

  for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < total_size; i += blockDim.x * gridDim.x) {
    channel_idx = floor(static_cast<double>(i) / static_cast<double>(per_channel_num));
    if (input[i] < nudge_min[channel_idx] || input[i] > nudge_max[channel_idx]) {
      output[i] = 0;
    } else {
      output[i] = gradient[i];
    }
  }
 }

 void CalFakeQuantPerChannelGrad(const float *input, const float *gradient, float *output, const int total_num,
                                const int channel_num, const float *nudge_min, const float *nudge_max,
                                cudaStream_t cuda_stream) {
  FakeQuantPerChannelGrad<<<GET_BLOCKS(channel_num), GET_THREADS, 0, cuda_stream>>>(input, gradient, output, total_num,
                                                                                    channel_num, nudge_min, nudge_max);
 }