You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

lowbit_utils.cpp 2.7 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263
  1. /**
  2. * \file dnn/src/naive/lowbit_utils.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "src/naive/lowbit_utils.h"
  12. // =================================quint4======================================
  13. void megdnn::naive::uint4_to_uint8(const TensorND& in, const TensorND& out) {
  14. auto in_ptr = static_cast<uint8_t*>(in.raw_ptr) + in.layout.span().low_byte;
  15. auto out_ptr = out.compatible_ptr<uint8_t>() + out.layout.span().low_byte;
  16. for (size_t i = 0; i < in.layout.span().dist_elem(); i += 2) {
  17. uint8_t val = in_ptr[i / 2];
  18. out_ptr[i] = val & 0xF;
  19. out_ptr[i + 1] = (val >> 4) & 0xF;
  20. }
  21. }
  22. void megdnn::naive::uint8_to_uint4(const TensorND& in, const TensorND& out) {
  23. auto in_ptr = static_cast<uint8_t*>(in.raw_ptr) + in.layout.span().low_byte;
  24. auto out_ptr =
  25. static_cast<uint8_t*>(out.raw_ptr) + out.layout.span().low_byte;
  26. for (size_t i = 0; i < out.layout.span().dist_elem(); i += 2) {
  27. uint8_t a = in_ptr[i], b = in_ptr[i + 1];
  28. a = std::min(a, DTypeTrait<dtype::Quantized4Asymm>::max());
  29. b = std::min(b, DTypeTrait<dtype::Quantized4Asymm>::max());
  30. out_ptr[i / 2] = a + (b << 4);
  31. }
  32. }
  33. // ==================================qint4======================================
  34. void megdnn::naive::int4_to_int8(const TensorND& in, const TensorND& out) {
  35. auto in_ptr = static_cast<int8_t*>(in.raw_ptr) + in.layout.span().low_byte;
  36. auto out_ptr =
  37. static_cast<int8_t*>(out.raw_ptr) + out.layout.span().low_byte;
  38. for (size_t i = 0; i < in.layout.span().dist_elem(); i += 2) {
  39. int8_t cur = in_ptr[i / 2];
  40. out_ptr[i] = cur << 4;
  41. out_ptr[i] = out_ptr[i] >> 4;
  42. out_ptr[i + 1] = cur >> 4;
  43. }
  44. }
  45. void megdnn::naive::int8_to_int4(const TensorND& in, const TensorND& out) {
  46. auto in_ptr = static_cast<int8_t*>(in.raw_ptr) + in.layout.span().low_byte;
  47. auto out_ptr =
  48. static_cast<int8_t*>(out.raw_ptr) + out.layout.span().low_byte;
  49. for (size_t i = 0; i < out.layout.span().dist_elem(); i += 2) {
  50. int8_t a = in_ptr[i], b = in_ptr[i + 1];
  51. a = std::min(a, DTypeTrait<dtype::QuantizedS4>::max());
  52. a = std::max(a, DTypeTrait<dtype::QuantizedS4>::min());
  53. b = std::min(b, DTypeTrait<dtype::QuantizedS4>::max());
  54. b = std::max(b, DTypeTrait<dtype::QuantizedS4>::min());
  55. out_ptr[i / 2] = (a & 0xF) | (b << 4);
  56. }
  57. }

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台

Contributors (1)