Browse Source

!2097 review gpu quant code

Merge pull request !2097 from chenzhongming/master
tags/v0.5.0-beta
mindspore-ci-bot Gitee 5 years ago
parent
commit
4d34e49e8f
6 changed files with 6 additions and 20 deletions
  1. +1
    -6
      mindspore/ccsrc/kernel/gpu/quant/batchnorm_fold2_gpu_kernel.h
  2. +0
    -2
      mindspore/ccsrc/kernel/gpu/quant/batchnorm_fold2_grad_gpu_kernel.h
  3. +0
    -2
      mindspore/ccsrc/kernel/gpu/quant/batchnorm_fold_gpu_kernel.h
  4. +1
    -5
      mindspore/ccsrc/kernel/gpu/quant/batchnorm_fold_grad_gpu_kernel.h
  5. +3
    -3
      mindspore/ccsrc/kernel/gpu/quant/correction_mul_gpu_kernel.h
  6. +1
    -2
      mindspore/ccsrc/kernel/gpu/quant/fake_quant_grad_gpu_kernel.cc

+ 1
- 6
mindspore/ccsrc/kernel/gpu/quant/batchnorm_fold2_gpu_kernel.h View File

@@ -39,12 +39,10 @@ class BatchNormFold2GpuKernel : public GpuKernel {
~BatchNormFold2GpuKernel() override { DestroyResource(); }

const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }

const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }

const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }

bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
if (is_null_input_) {
return true;
@@ -111,10 +109,7 @@ class BatchNormFold2GpuKernel : public GpuKernel {
input_size_list_.push_back(weight_size); // running_std
input_size_list_.push_back(weight_size); // running_mean
input_size_list_.push_back(sizeof(int32_t)); // global_step

output_size_list_.push_back(input_size);

workspace_size_list_.push_back(sizeof(int32_t));
}

private:


+ 0
- 2
mindspore/ccsrc/kernel/gpu/quant/batchnorm_fold2_grad_gpu_kernel.h View File

@@ -39,9 +39,7 @@ class BatchNormFold2GradGpuKernel : public GpuKernel {
~BatchNormFold2GradGpuKernel() override { DestroyResource(); }

const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }

const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }

const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }

bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,


+ 0
- 2
mindspore/ccsrc/kernel/gpu/quant/batchnorm_fold_gpu_kernel.h View File

@@ -47,9 +47,7 @@ class BatchNormFoldGpuKernel : public GpuKernel {
~BatchNormFoldGpuKernel() override { DestroyResource(); }

const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }

const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }

const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }

bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,


+ 1
- 5
mindspore/ccsrc/kernel/gpu/quant/batchnorm_fold_grad_gpu_kernel.h View File

@@ -46,9 +46,8 @@ class BatchNormFoldGradGpuKernel : public GpuKernel {
const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }

bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
(void)workspace;
// 'd_batch_mean', 'd_batch_std', 'x', 'batch_mean', 'batch_std', 'current_step'
T *d_batch_mean = GetDeviceAddress<T>(inputs, 0);
T *d_batch_std = GetDeviceAddress<T>(inputs, 1);
@@ -139,11 +138,8 @@ class BatchNormFoldGradGpuKernel : public GpuKernel {
input_size_list_.push_back(channel_size_);
input_size_list_.push_back(channel_size_);
input_size_list_.push_back(sizeof(int));

// 'dx'
output_size_list_.push_back(input_size_);

workspace_size_list_.push_back(workspace_size_);
}

private:


+ 3
- 3
mindspore/ccsrc/kernel/gpu/quant/correction_mul_gpu_kernel.h View File

@@ -33,7 +33,8 @@ class CorrectionMulGpuKernel : public GpuKernel {
const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,

bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
auto *weight = GetDeviceAddress<T>(inputs, 0);
auto *gamma = GetDeviceAddress<T>(inputs, 1);
@@ -74,10 +75,9 @@ class CorrectionMulGpuKernel : public GpuKernel {
input_size_list_.push_back(input_size); // weight
input_size_list_.push_back(weight_size); // gamma
input_size_list_.push_back(weight_size); // running_std
size_t workspace_size = 0;
output_size_list_.push_back(input_size);
workspace_size_list_.push_back(workspace_size);
}

void InitResource() override {}

private:


+ 1
- 2
mindspore/ccsrc/kernel/gpu/quant/fake_quant_grad_gpu_kernel.cc View File

@@ -101,10 +101,9 @@ void FakeQuantGradGpuKernel::InitSizeLists() {
input_size_list_.push_back(min_size_); // min
input_size_list_.push_back(max_size_); // max
output_size_list_.push_back(output_size_);
workspace_size_list_.push_back(workspace_size_);
}
bool FakeQuantGradGpuKernel::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
bool FakeQuantGradGpuKernel::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
const std::vector<AddressPtr> &outputs, void *stream_ptr) {
float *output = GetDeviceAddress<float>(outputs, 0);
float *gradient = GetDeviceAddress<float>(inputs, 0);


Loading…
Cancel
Save