You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

relayout_format.cpp 6.2 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129
  1. /**
  2. * \file dnn/src/cuda/relayout_format/relayout_format.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
  10. * implied.
  11. */
  12. #include "src/cuda/relayout_format/relayout_format.cuh"
  13. #include "src/cuda/relayout_format/relayout_format.h"
  14. #include "src/common/utils.h"
  15. #include "megdnn/dtype.h"
  16. using namespace megdnn;
  17. using namespace cuda;
  18. namespace {
  19. inline void get_scale_zeropoint(const DType& tensor_dtype, float& scale,
  20. uint8_t& zero_point) {
  21. if (tensor_dtype.enumv() == DTypeEnum::Quantized8Asymm) {
  22. zero_point = tensor_dtype.param<dtype::Quantized8Asymm>().zero_point;
  23. scale = tensor_dtype.param<dtype::Quantized8Asymm>().scale;
  24. } else if (tensor_dtype.enumv() == DTypeEnum::QuantizedS8) {
  25. scale = tensor_dtype.param<dtype::QuantizedS8>().scale;
  26. } else if (tensor_dtype.enumv() == DTypeEnum::QuantizedS4) {
  27. scale = tensor_dtype.param<dtype::QuantizedS4>().scale;
  28. } else if (tensor_dtype.enumv() == DTypeEnum::Quantized4Asymm) {
  29. zero_point = tensor_dtype.param<dtype::Quantized4Asymm>().zero_point;
  30. scale = tensor_dtype.param<dtype::Quantized4Asymm>().scale;
  31. }
  32. }
  33. } // namespace
  34. bool relayout_format::RelayoutFormatFast::usable(
  35. const TensorLayout& src_layout, const TensorLayout& dst_layout,
  36. const RelayoutFormat::Param::Mode& mode) {
  37. bool is_all_continue =
  38. src_layout.is_contiguous() && dst_layout.is_contiguous();
  39. bool is_all_int32 =
  40. (src_layout.dtype.enumv() == DTypeEnum::QuantizedS32 &&
  41. dst_layout.dtype.enumv() == DTypeEnum::QuantizedS32);
  42. bool is_all_int8 =
  43. (src_layout.dtype.enumv() == DTypeEnum::Uint8 &&
  44. dst_layout.dtype.enumv() == DTypeEnum::QuantizedS8) ||
  45. (src_layout.dtype.enumv() == DTypeEnum::Quantized8Asymm &&
  46. dst_layout.dtype.enumv() == DTypeEnum::QuantizedS8) ||
  47. (src_layout.dtype.enumv() == DTypeEnum::Quantized8Asymm &&
  48. dst_layout.dtype.enumv() == DTypeEnum::Quantized8Asymm) ||
  49. (src_layout.dtype.enumv() == DTypeEnum::QuantizedS8 &&
  50. dst_layout.dtype.enumv() == DTypeEnum::QuantizedS8);
  51. bool is_all_int4 =
  52. (src_layout.dtype.enumv() == DTypeEnum::QuantizedS4 &&
  53. dst_layout.dtype.enumv() == DTypeEnum::QuantizedS4) ||
  54. (src_layout.dtype.enumv() == DTypeEnum::Quantized4Asymm &&
  55. dst_layout.dtype.enumv() == DTypeEnum::Quantized4Asymm);
  56. bool is_nchw4_nchw_ok = true;
  57. if (mode == RelayoutFormat::Param::Mode::NCHW4_NCHW) {
  58. is_nchw4_nchw_ok =
  59. (src_layout.dtype.enumv() ==
  60. DTypeEnum::Quantized8Asymm ||
  61. src_layout.dtype.enumv() == DTypeEnum::QuantizedS8) &&
  62. src_layout.dtype == dst_layout.dtype;
  63. }
  64. return is_all_continue && (is_all_int32 || is_all_int8 || is_all_int4) &&
  65. is_nchw4_nchw_ok;
  66. }
  67. void relayout_format::RelayoutFormatFast::exec(const TensorND& src,
  68. const TensorND& dst,
  69. cudaStream_t stream,
  70. RelayoutFormat::Param::Mode mode,
  71. int group) {
  72. float src_scale = 1.f;
  73. float dst_scale = 1.f;
  74. uint8_t src_zero_point = 0;
  75. uint8_t dst_zero_point = 0;
  76. get_scale_zeropoint(src.layout.dtype, src_scale, src_zero_point);
  77. get_scale_zeropoint(dst.layout.dtype, dst_scale, dst_zero_point);
  78. if (src.layout.dtype.enumv() == DTypeEnum::Uint8) {
  79. src_zero_point = 128;
  80. }
  81. if (mode == RelayoutFormat::Param::Mode::NCHW_NCHW4 ||
  82. mode == RelayoutFormat::Param::Mode::NCHW_NCHW64) {
  83. return relayout_format_cuda_nchw_nchwx(src, dst, stream, src_scale,
  84. dst_scale, src_zero_point,
  85. dst_zero_point, group);
  86. } else if (mode == RelayoutFormat::Param::Mode::NCHW64_NCHW) {
  87. megdnn_assert(group == 1,
  88. "RelayoutFormat kernel only support transforming NCHW64 "
  89. "to NCHW with group = 1(group:%d)",
  90. group);
  91. return relayout_format_cuda_nchwx_nchw(src, dst, stream, src_scale,
  92. dst_scale, src_zero_point,
  93. dst_zero_point);
  94. } else if (mode == RelayoutFormat::Param::Mode::NCHW_NHWC) {
  95. #define CHECK(dt) \
  96. megdnn_assert(dt.enumv() == DTypeEnum::Quantized4Asymm || \
  97. dt.enumv() == DTypeEnum::QuantizedS4)
  98. CHECK(src.layout.dtype);
  99. CHECK(dst.layout.dtype);
  100. return relayout_format_cuda_nchw_nhwc(src, dst, stream, src_scale,
  101. dst_scale, src_zero_point,
  102. dst_zero_point);
  103. } else if (mode == RelayoutFormat::Param::Mode::NHWC_NCHW) {
  104. CHECK(src.layout.dtype);
  105. CHECK(dst.layout.dtype);
  106. return relayout_format_cuda_nhwc_nchw(src, dst, stream, src_scale,
  107. dst_scale, src_zero_point,
  108. dst_zero_point);
  109. #undef CHECK
  110. } else if (mode == RelayoutFormat::Param::Mode::NCHW_NCHW4_WEIGHT) {
  111. return relayout_format_cuda_nchw_nchw4_weight(src, dst, stream);
  112. } else if (mode == RelayoutFormat::Param::Mode::NCHW4_NCHW) {
  113. return relayout_format_cuda_nchw4_nchw(src, dst, stream, group);
  114. } else {
  115. megdnn_throw(
  116. "only support nchw_nchw64/nchw64_nchw/nchw_nchw4/nchw4_nchw "
  117. "layout_format");
  118. }
  119. }
  120. // vim: ft=cpp syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台