You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

tile.cpp 3.1 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117
  1. // Tencent is pleased to support the open source community by making ncnn available.
  2. //
  3. // Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved.
  4. //
  5. // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
  6. // in compliance with the License. You may obtain a copy of the License at
  7. //
  8. // https://opensource.org/licenses/BSD-3-Clause
  9. //
  10. // Unless required by applicable law or agreed to in writing, software distributed
  11. // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
  12. // CONDITIONS OF ANY KIND, either express or implied. See the License for the
  13. // specific language governing permissions and limitations under the License.
  14. #include "tile.h"
  15. namespace ncnn {
  16. Tile::Tile()
  17. {
  18. one_blob_only = true;
  19. support_inplace = false;
  20. }
  21. int Tile::load_param(const ParamDict& pd)
  22. {
  23. dim = pd.get(0, 0);
  24. tiles = pd.get(1, 1);
  25. return 0;
  26. }
  27. int Tile::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const
  28. {
  29. int w = bottom_blob.w;
  30. int h = bottom_blob.h;
  31. int channels = bottom_blob.c;
  32. size_t elemsize = bottom_blob.elemsize;
  33. if (dim == 0)
  34. {
  35. top_blob.create(w, h, channels * tiles, elemsize, opt.blob_allocator);
  36. if (top_blob.empty())
  37. return -100;
  38. const float* ptr = bottom_blob;
  39. int size = bottom_blob.cstep * channels;
  40. #pragma omp parallel for num_threads(opt.num_threads)
  41. for (int p = 0; p < tiles; p++)
  42. {
  43. float* outptr = top_blob.channel(p * channels);
  44. for (int i = 0; i < size; i++)
  45. {
  46. outptr[i] = ptr[i];
  47. }
  48. }
  49. }
  50. else if (dim == 1)
  51. {
  52. top_blob.create(w, h * tiles, channels, elemsize, opt.blob_allocator);
  53. if (top_blob.empty())
  54. return -100;
  55. int size = w * h;
  56. #pragma omp parallel for num_threads(opt.num_threads)
  57. for (int q = 0; q < channels; q++)
  58. {
  59. const float* ptr = bottom_blob.channel(q);
  60. float* outptr = top_blob.channel(q);
  61. for (int p = 0; p < tiles; p++)
  62. {
  63. for (int i = 0; i < size; i++)
  64. {
  65. outptr[i] = ptr[i];
  66. }
  67. outptr += size;
  68. }
  69. }
  70. }
  71. else if (dim == 2)
  72. {
  73. top_blob.create(w * tiles, h, channels, elemsize, opt.blob_allocator);
  74. if (top_blob.empty())
  75. return -100;
  76. #pragma omp parallel for num_threads(opt.num_threads)
  77. for (int q = 0; q < channels; q++)
  78. {
  79. const float* ptr = bottom_blob.channel(q);
  80. float* outptr = top_blob.channel(q);
  81. for (int i = 0; i < h; i++)
  82. {
  83. for (int p = 0; p < tiles; p++)
  84. {
  85. for (int j = 0; j < w; j++)
  86. {
  87. outptr[j] = ptr[j];
  88. }
  89. outptr += w;
  90. }
  91. ptr += w;
  92. }
  93. }
  94. }
  95. return 0;
  96. }
  97. } // namespace ncnn