You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

grad_quant_ops.py 8.5 kB

5 years ago
5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234
  1. # Copyright 2020 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ============================================================================
  15. """Generate bprop for quantization aware ops"""
  16. from .. import operations as P
  17. from ..operations import _quant_ops as Q
  18. from .grad_base import bprop_getters
  19. from ..composite.multitype_ops.zeros_like_impl import zeros_like
  20. from ... import context
  21. @bprop_getters.register(Q.FakeQuantPerLayer)
  22. def get_bprop_fakequant_with_minmax(self):
  23. """Generate bprop for FakeQuantPerLayer for GPU and Ascend"""
  24. op = Q.FakeQuantPerLayerGrad(
  25. num_bits=self.num_bits, quant_delay=self.quant_delay)
  26. def bprop(x, x_min, x_max, out, dout):
  27. dx = op(dout, x, x_min, x_max)
  28. return dx, zeros_like(x_min), zeros_like(x_max)
  29. return bprop
  30. @bprop_getters.register(Q.FakeQuantWithMinMaxVars)
  31. def get_bprop_fakequant_with_minmax_vars(self):
  32. """Generate bprop for FakeQuantWithMinMaxVars for Ascend"""
  33. op = Q.FakeQuantWithMinMaxVarsGradient(
  34. num_bits=self.num_bits, narrow_range=self.narrow_range)
  35. def bprop(x, x_min, x_max, out, dout):
  36. dx = op(dout, x, x_min, x_max)
  37. return dx, zeros_like(x_min), zeros_like(x_max)
  38. return bprop
  39. @bprop_getters.register(Q.FakeQuantWithMinMaxVarsPerChannel)
  40. def get_bprop_fakequant_with_minmax_vars_perchannel(self):
  41. """Generate bprop for FakeQuantWithMinMaxVarsPerChannel for Ascend"""
  42. op = Q.FakeQuantWithMinMaxVarsPerChannelGradient(
  43. num_bits=self.num_bits, narrow_range=self.narrow_range)
  44. def bprop(x, x_min, x_max, out, dout):
  45. dx = op(dout, x, x_min, x_max)
  46. return dx, zeros_like(x_min), zeros_like(x_max)
  47. return bprop
  48. @bprop_getters.register(Q.FakeQuantPerChannel)
  49. def get_bprop_fakequant_with_minmax_perchannel(self):
  50. """Generate bprop for FakeQuantPerChannel"""
  51. op = Q.FakeQuantPerChannelGrad(num_bits=self.num_bits,
  52. quant_delay=self.quant_delay,
  53. symmetric=self.symmetric,
  54. narrow_range=self.symmetric,
  55. channel_axis=self.channel_axis)
  56. def bprop(x, x_min, x_max, out, dout):
  57. dx = op(dout, x, x_min, x_max)
  58. return dx, zeros_like(x_min), zeros_like(x_max)
  59. return bprop
  60. @bprop_getters.register(Q.BatchNormFold)
  61. def get_bprop_batchnorm_fold(self):
  62. """Generate bprop for BatchNormFold for GPU"""
  63. op = Q.BatchNormFoldGrad(self.epsilon, self.is_training, self.freeze_bn)
  64. def bprop(x, mean, variance, global_step, out, dout):
  65. dx = op(dout[0], dout[1], x, out[0], out[1], global_step)
  66. return dx, zeros_like(mean), zeros_like(variance), zeros_like(global_step)
  67. return bprop
  68. @bprop_getters.register(Q.CorrectionMul)
  69. def get_bprop_correction_mul(self):
  70. """Generate bprop for CorrectionMul for Ascend and GPU"""
  71. grad_dx = Q.CorrectionMulGrad(self.channel_axis)
  72. grad_d_batch_std = Q.CorrectionMulGradReduce(self.channel_axis)
  73. def bprop(x, batch_std, running_std, out, dout):
  74. dx, d_batch_std = grad_dx(dout, x, batch_std, running_std)
  75. return dx, d_batch_std, zeros_like(running_std)
  76. def bprop_npu(x, batch_std, running_std, out, dout):
  77. dx, mul_dx = grad_dx(dout, x, batch_std, running_std)
  78. d_batch_std = grad_d_batch_std(mul_dx)
  79. return dx, d_batch_std, zeros_like(running_std)
  80. if context.get_context('device_target') == "Ascend":
  81. return bprop_npu
  82. return bprop
  83. @bprop_getters.register(Q.BatchNormFold2)
  84. def get_bprop_batchnorm_fold2(self):
  85. """Generate bprop for BatchNormFold2 for GPU"""
  86. op_f = Q.BatchNormFold2Grad(freeze_bn=self.freeze_bn)
  87. def bprop(x, beta, gamma, batch_std, batch_mean, running_std, running_mean, global_step, out, dout):
  88. d_batch_std, d_batch_mean, d_beta, d_gamma, d_x = op_f(dout, x, gamma, batch_std, batch_mean, running_std,
  89. running_mean, global_step)
  90. return d_x, d_beta, d_gamma, d_batch_std, d_batch_mean, zeros_like(running_std), zeros_like(running_mean), \
  91. zeros_like(global_step)
  92. return bprop
  93. @bprop_getters.register(Q.BatchNormFoldD)
  94. def get_bprop_batchnormfold(self):
  95. """Generate bprop for BatchNormFold for Ascend"""
  96. op = Q.BatchNormFoldGradD(self.epsilon, self.is_training, self.freeze_bn)
  97. def bprop(x, x_sum, x_square_sum, mean, variance, out, dout):
  98. dx = op(dout[1], dout[2], x, out[1], out[2])
  99. return dx, zeros_like(x_sum), zeros_like(x_square_sum), zeros_like(mean), zeros_like(variance)
  100. return bprop
  101. @bprop_getters.register(P.BNTrainingReduce)
  102. def get_bprop_bn_training_reduce(self):
  103. """Generate bprop for BNTrainingReduce for Ascend"""
  104. def bprop(x, out, dout):
  105. return (zeros_like(x),)
  106. return bprop
  107. @bprop_getters.register(Q.BatchNormFold2D)
  108. def get_bprop_batchnorm_fold2_(self):
  109. """Generate bprop for BatchNormFold2 for Ascend"""
  110. op_reduce = Q.BatchNormFold2GradReduce(freeze_bn=self.freeze_bn)
  111. op_f = Q.BatchNormFold2GradD(freeze_bn=self.freeze_bn)
  112. def bprop(x, beta, gamma, batch_std, batch_mean, running_std, out, dout):
  113. dout_reduce, dout_x_reduce = op_reduce(dout, x)
  114. d_batch_std, d_batch_mean, d_gamma, d_x = op_f(dout, dout_reduce, dout_x_reduce, gamma, batch_std,
  115. batch_mean, running_std)
  116. return d_x, dout_reduce, d_gamma, d_batch_std, d_batch_mean, zeros_like(running_std)
  117. return bprop
  118. @bprop_getters.register(Q.MinMaxUpdatePerLayer)
  119. def get_bprop_fakequant_with_minmax_per_layer_update(self):
  120. """Generate bprop for MinMaxUpdatePerLayer for Ascend"""
  121. def bprop(x, x_min, x_max, out, dout):
  122. return zeros_like(x), zeros_like(x_min), zeros_like(x_max)
  123. return bprop
  124. @bprop_getters.register(Q.MinMaxUpdatePerChannel)
  125. def get_bprop_fakequant_with_minmax_per_channel_update(self):
  126. """Generate bprop for MinMaxUpdatePerChannel for Ascend"""
  127. def bprop(x, x_min, x_max, out, dout):
  128. return zeros_like(x), zeros_like(x_min), zeros_like(x_max)
  129. return bprop
  130. @bprop_getters.register(Q.ActsULQ)
  131. def get_bprop_acts_ulq(self):
  132. """Grad definition for 'ActsULQ' operation"""
  133. op = Q.ActsULQInputGrad()
  134. op1 = Q.ActULQClampMinGrad()
  135. op2 = Q.ActULQClampMaxGrad()
  136. def bprop(x, clamp_min, clamp_max, out, dout):
  137. dx = op(dout[0], out[1], out[2])
  138. dx1 = op1(dout[0], out[1], out[3])
  139. dx2 = op2(dout[0], out[2], out[3])
  140. return (dx, dx1, dx2)
  141. return bprop
  142. @bprop_getters.register(Q.WtsARQ)
  143. def get_bprop_wts_arq(self):
  144. """Grad definition for 'WtsArq' operation"""
  145. def bprop(w, w_min, w_max, out, dout):
  146. return (dout, zeros_like(w_min), zeros_like(w_max))
  147. return bprop
  148. @bprop_getters.register(Q.FakeLearnedScaleQuantPerLayer)
  149. def get_bprop_fakequant_with_learned_scale_perlayer(self):
  150. """Generate bprop for FakeLearnedScaleQuantPerLayer for GPU"""
  151. op = Q.FakeLearnedScaleQuantPerLayerGrad(quant_delay=self.quant_delay,
  152. neg_trunc=self.neg_trunc)
  153. def bprop(x, x_alpha, x_quant_max, out, dout):
  154. dx, dalpha = op(dout, x, x_alpha, x_quant_max)
  155. return dx, dalpha, zeros_like(x_quant_max)
  156. return bprop
  157. @bprop_getters.register(Q.FakeLearnedScaleQuantPerChannel)
  158. def get_bprop_fakequant_with_learned_scale_perchannel(self):
  159. """Generate bprop for FakeLearnedScaleQuantPerChannel for GPU"""
  160. op = Q.FakeLearnedScaleQuantPerChannelGrad(quant_delay=self.quant_delay,
  161. neg_trunc=self.neg_trunc,
  162. channel_axis=self.channel_axis)
  163. def bprop(x, x_alpha, x_quant_max, out, dout):
  164. dx, dalpha = op(dout, x, x_alpha, x_quant_max)
  165. return dx, dalpha, zeros_like(x_quant_max)
  166. return bprop