From d7cbc055f339529d54c43379e742db3dd0cd521e Mon Sep 17 00:00:00 2001 From: nihui Date: Wed, 7 Apr 2021 11:08:55 +0800 Subject: [PATCH] fix illegal instruction on pi4 when NCNN_ARM82 enabled compiler may compile inline member functions as noinline blocks for different architectures, and linker may pick the newer arch, that results illegal instructions on old hardware --- src/mat.cpp | 920 +++++++++++++++++++++++++++++++++++++++++++++++++++ src/mat.h | 927 +--------------------------------------------------- 2 files changed, 923 insertions(+), 924 deletions(-) diff --git a/src/mat.cpp b/src/mat.cpp index 07c13a388..1e92f8039 100644 --- a/src/mat.cpp +++ b/src/mat.cpp @@ -33,6 +33,926 @@ namespace ncnn { +Mat& Mat::operator=(const Mat& m) +{ + if (this == &m) + return *this; + + if (m.refcount) + NCNN_XADD(m.refcount, 1); + + release(); + + data = m.data; + refcount = m.refcount; + elemsize = m.elemsize; + elempack = m.elempack; + allocator = m.allocator; + + dims = m.dims; + w = m.w; + h = m.h; + c = m.c; + + cstep = m.cstep; + + return *this; +} + +Mat Mat::clone(Allocator* _allocator) const +{ + if (empty()) + return Mat(); + + Mat m; + if (dims == 1) + m.create(w, elemsize, elempack, _allocator); + else if (dims == 2) + m.create(w, h, elemsize, elempack, _allocator); + else if (dims == 3) + m.create(w, h, c, elemsize, elempack, _allocator); + + if (total() > 0) + { + memcpy(m.data, data, total() * elemsize); + } + + return m; +} + +void Mat::clone_from(const ncnn::Mat& mat, Allocator* allocator) +{ + *this = mat.clone(allocator); +} + +Mat Mat::reshape(int _w, Allocator* _allocator) const +{ + if (w * h * c != _w) + return Mat(); + + if (dims == 3 && cstep != (size_t)w * h) + { + Mat m; + m.create(_w, elemsize, elempack, _allocator); + + // flatten + for (int i = 0; i < c; i++) + { + const void* ptr = (unsigned char*)data + i * cstep * elemsize; + void* mptr = (unsigned char*)m.data + (size_t)i * w * h * elemsize; + memcpy(mptr, ptr, (size_t)w * h * elemsize); + } + + return m; + } + + Mat m = *this; + + m.dims = 1; + m.w = _w; + m.h = 1; + m.c = 1; + + m.cstep = _w; + + return m; +} + +Mat Mat::reshape(int _w, int _h, Allocator* _allocator) const +{ + if (w * h * c != _w * _h) + return Mat(); + + if (dims == 3 && cstep != (size_t)w * h) + { + Mat m; + m.create(_w, _h, elemsize, elempack, _allocator); + + // flatten + for (int i = 0; i < c; i++) + { + const void* ptr = (unsigned char*)data + i * cstep * elemsize; + void* mptr = (unsigned char*)m.data + (size_t)i * w * h * elemsize; + memcpy(mptr, ptr, (size_t)w * h * elemsize); + } + + return m; + } + + Mat m = *this; + + m.dims = 2; + m.w = _w; + m.h = _h; + m.c = 1; + + m.cstep = (size_t)_w * _h; + + return m; +} + +Mat Mat::reshape(int _w, int _h, int _c, Allocator* _allocator) const +{ + if (w * h * c != _w * _h * _c) + return Mat(); + + if (dims < 3) + { + if ((size_t)_w * _h != alignSize((size_t)_w * _h * elemsize, 16) / elemsize) + { + Mat m; + m.create(_w, _h, _c, elemsize, elempack, _allocator); + + // align channel + for (int i = 0; i < _c; i++) + { + const void* ptr = (unsigned char*)data + (size_t)i * _w * _h * elemsize; + void* mptr = (unsigned char*)m.data + i * m.cstep * m.elemsize; + memcpy(mptr, ptr, (size_t)_w * _h * elemsize); + } + + return m; + } + } + else if (c != _c) + { + // flatten and then align + Mat tmp = reshape(_w * _h * _c, _allocator); + return tmp.reshape(_w, _h, _c, _allocator); + } + + Mat m = *this; + + m.dims = 3; + m.w = _w; + m.h = _h; + m.c = _c; + + m.cstep = alignSize((size_t)_w * _h * elemsize, 16) / elemsize; + + return m; +} + +void Mat::create(int _w, size_t _elemsize, Allocator* _allocator) +{ + if (dims == 1 && w == _w && elemsize == _elemsize && elempack == 1 && allocator == _allocator) + return; + + release(); + + elemsize = _elemsize; + elempack = 1; + allocator = _allocator; + + dims = 1; + w = _w; + h = 1; + c = 1; + + cstep = w; + + if (total() > 0) + { + size_t totalsize = alignSize(total() * elemsize, 4); + if (allocator) + data = allocator->fastMalloc(totalsize + (int)sizeof(*refcount)); + else + data = fastMalloc(totalsize + (int)sizeof(*refcount)); + refcount = (int*)(((unsigned char*)data) + totalsize); + *refcount = 1; + } +} + +void Mat::create(int _w, int _h, size_t _elemsize, Allocator* _allocator) +{ + if (dims == 2 && w == _w && h == _h && elemsize == _elemsize && elempack == 1 && allocator == _allocator) + return; + + release(); + + elemsize = _elemsize; + elempack = 1; + allocator = _allocator; + + dims = 2; + w = _w; + h = _h; + c = 1; + + cstep = (size_t)w * h; + + if (total() > 0) + { + size_t totalsize = alignSize(total() * elemsize, 4); + if (allocator) + data = allocator->fastMalloc(totalsize + (int)sizeof(*refcount)); + else + data = fastMalloc(totalsize + (int)sizeof(*refcount)); + refcount = (int*)(((unsigned char*)data) + totalsize); + *refcount = 1; + } +} + +void Mat::create(int _w, int _h, int _c, size_t _elemsize, Allocator* _allocator) +{ + if (dims == 3 && w == _w && h == _h && c == _c && elemsize == _elemsize && elempack == 1 && allocator == _allocator) + return; + + release(); + + elemsize = _elemsize; + elempack = 1; + allocator = _allocator; + + dims = 3; + w = _w; + h = _h; + c = _c; + + cstep = alignSize((size_t)w * h * elemsize, 16) / elemsize; + + if (total() > 0) + { + size_t totalsize = alignSize(total() * elemsize, 4); + if (allocator) + data = allocator->fastMalloc(totalsize + (int)sizeof(*refcount)); + else + data = fastMalloc(totalsize + (int)sizeof(*refcount)); + refcount = (int*)(((unsigned char*)data) + totalsize); + *refcount = 1; + } +} + +void Mat::create(int _w, size_t _elemsize, int _elempack, Allocator* _allocator) +{ + if (dims == 1 && w == _w && elemsize == _elemsize && elempack == _elempack && allocator == _allocator) + return; + + release(); + + elemsize = _elemsize; + elempack = _elempack; + allocator = _allocator; + + dims = 1; + w = _w; + h = 1; + c = 1; + + cstep = w; + + if (total() > 0) + { + size_t totalsize = alignSize(total() * elemsize, 4); + if (allocator) + data = allocator->fastMalloc(totalsize + (int)sizeof(*refcount)); + else + data = fastMalloc(totalsize + (int)sizeof(*refcount)); + refcount = (int*)(((unsigned char*)data) + totalsize); + *refcount = 1; + } +} + +void Mat::create(int _w, int _h, size_t _elemsize, int _elempack, Allocator* _allocator) +{ + if (dims == 2 && w == _w && h == _h && elemsize == _elemsize && elempack == _elempack && allocator == _allocator) + return; + + release(); + + elemsize = _elemsize; + elempack = _elempack; + allocator = _allocator; + + dims = 2; + w = _w; + h = _h; + c = 1; + + cstep = (size_t)w * h; + + if (total() > 0) + { + size_t totalsize = alignSize(total() * elemsize, 4); + if (allocator) + data = allocator->fastMalloc(totalsize + (int)sizeof(*refcount)); + else + data = fastMalloc(totalsize + (int)sizeof(*refcount)); + refcount = (int*)(((unsigned char*)data) + totalsize); + *refcount = 1; + } +} + +void Mat::create(int _w, int _h, int _c, size_t _elemsize, int _elempack, Allocator* _allocator) +{ + if (dims == 3 && w == _w && h == _h && c == _c && elemsize == _elemsize && elempack == _elempack && allocator == _allocator) + return; + + release(); + + elemsize = _elemsize; + elempack = _elempack; + allocator = _allocator; + + dims = 3; + w = _w; + h = _h; + c = _c; + + cstep = alignSize((size_t)w * h * elemsize, 16) / elemsize; + + if (total() > 0) + { + size_t totalsize = alignSize(total() * elemsize, 4); + if (allocator) + data = allocator->fastMalloc(totalsize + (int)sizeof(*refcount)); + else + data = fastMalloc(totalsize + (int)sizeof(*refcount)); + refcount = (int*)(((unsigned char*)data) + totalsize); + *refcount = 1; + } +} + +void Mat::create_like(const Mat& m, Allocator* _allocator) +{ + int _dims = m.dims; + if (_dims == 1) + create(m.w, m.elemsize, m.elempack, _allocator); + if (_dims == 2) + create(m.w, m.h, m.elemsize, m.elempack, _allocator); + if (_dims == 3) + create(m.w, m.h, m.c, m.elemsize, m.elempack, _allocator); +} + +#if NCNN_VULKAN +void Mat::create_like(const VkMat& m, Allocator* _allocator) +{ + int _dims = m.dims; + if (_dims == 1) + create(m.w, m.elemsize, m.elempack, _allocator); + if (_dims == 2) + create(m.w, m.h, m.elemsize, m.elempack, _allocator); + if (_dims == 3) + create(m.w, m.h, m.c, m.elemsize, m.elempack, _allocator); +} + +void Mat::create_like(const VkImageMat& im, Allocator* _allocator) +{ + int _dims = im.dims; + if (_dims == 1) + create(im.w, im.elemsize, im.elempack, _allocator); + if (_dims == 2) + create(im.w, im.h, im.elemsize, im.elempack, _allocator); + if (_dims == 3) + create(im.w, im.h, im.c, im.elemsize, im.elempack, _allocator); +} +#endif // NCNN_VULKAN + +void Mat::addref() +{ + if (refcount) + NCNN_XADD(refcount, 1); +} + +void Mat::release() +{ + if (refcount && NCNN_XADD(refcount, -1) == 1) + { + if (allocator) + allocator->fastFree(data); + else + fastFree(data); + } + + data = 0; + + elemsize = 0; + elempack = 0; + + dims = 0; + w = 0; + h = 0; + c = 0; + + cstep = 0; + + refcount = 0; +} + +#if NCNN_VULKAN +VkMat& VkMat::operator=(const VkMat& m) +{ + if (this == &m) + return *this; + + if (m.refcount) + NCNN_XADD(m.refcount, 1); + + release(); + + data = m.data; + refcount = m.refcount; + elemsize = m.elemsize; + elempack = m.elempack; + allocator = m.allocator; + + dims = m.dims; + w = m.w; + h = m.h; + c = m.c; + + cstep = m.cstep; + + return *this; +} + +void VkMat::create(int _w, size_t _elemsize, VkAllocator* _allocator) +{ + if (dims == 1 && w == _w && elemsize == _elemsize && elempack == 1 && allocator == _allocator) + return; + + release(); + + elemsize = _elemsize; + elempack = 1; + allocator = _allocator; + + dims = 1; + w = _w; + h = 1; + c = 1; + + cstep = w; + + if (total() > 0) + { + size_t totalsize = alignSize(total() * elemsize, 4); + + data = allocator->fastMalloc(totalsize); + + refcount = (int*)((unsigned char*)data + offsetof(VkBufferMemory, refcount)); + *refcount = 1; + } +} + +void VkMat::create(int _w, int _h, size_t _elemsize, VkAllocator* _allocator) +{ + if (dims == 2 && w == _w && h == _h && elemsize == _elemsize && elempack == 1 && allocator == _allocator) + return; + + release(); + + elemsize = _elemsize; + elempack = 1; + allocator = _allocator; + + dims = 2; + w = _w; + h = _h; + c = 1; + + cstep = w * h; + + if (total() > 0) + { + size_t totalsize = alignSize(total() * elemsize, 4); + + data = allocator->fastMalloc(totalsize); + + refcount = (int*)((unsigned char*)data + offsetof(VkBufferMemory, refcount)); + *refcount = 1; + } +} + +void VkMat::create(int _w, int _h, int _c, size_t _elemsize, VkAllocator* _allocator) +{ + if (dims == 3 && w == _w && h == _h && c == _c && elemsize == _elemsize && elempack == 1 && allocator == _allocator) + return; + + release(); + + elemsize = _elemsize; + elempack = 1; + allocator = _allocator; + + dims = 3; + w = _w; + h = _h; + c = _c; + + cstep = alignSize(w * h * elemsize, 16) / elemsize; + + if (total() > 0) + { + size_t totalsize = alignSize(total() * elemsize, 4); + + data = allocator->fastMalloc(totalsize); + + refcount = (int*)((unsigned char*)data + offsetof(VkBufferMemory, refcount)); + *refcount = 1; + } +} + +void VkMat::create(int _w, size_t _elemsize, int _elempack, VkAllocator* _allocator) +{ + if (dims == 1 && w == _w && elemsize == _elemsize && elempack == _elempack && allocator == _allocator) + return; + + release(); + + elemsize = _elemsize; + elempack = _elempack; + allocator = _allocator; + + dims = 1; + w = _w; + h = 1; + c = 1; + + cstep = w; + + if (total() > 0) + { + size_t totalsize = alignSize(total() * elemsize, 4); + + data = allocator->fastMalloc(totalsize); + + refcount = (int*)((unsigned char*)data + offsetof(VkBufferMemory, refcount)); + *refcount = 1; + } +} + +void VkMat::create(int _w, int _h, size_t _elemsize, int _elempack, VkAllocator* _allocator) +{ + if (dims == 2 && w == _w && h == _h && elemsize == _elemsize && elempack == _elempack && allocator == _allocator) + return; + + release(); + + elemsize = _elemsize; + elempack = _elempack; + allocator = _allocator; + + dims = 2; + w = _w; + h = _h; + c = 1; + + cstep = w * h; + + if (total() > 0) + { + size_t totalsize = alignSize(total() * elemsize, 4); + + data = allocator->fastMalloc(totalsize); + + refcount = (int*)((unsigned char*)data + offsetof(VkBufferMemory, refcount)); + *refcount = 1; + } +} + +void VkMat::create(int _w, int _h, int _c, size_t _elemsize, int _elempack, VkAllocator* _allocator) +{ + if (dims == 3 && w == _w && h == _h && c == _c && elemsize == _elemsize && elempack == _elempack && allocator == _allocator) + return; + + release(); + + elemsize = _elemsize; + elempack = _elempack; + allocator = _allocator; + + dims = 3; + w = _w; + h = _h; + c = _c; + + cstep = alignSize(w * h * elemsize, 16) / elemsize; + + if (total() > 0) + { + size_t totalsize = alignSize(total() * elemsize, 4); + + data = allocator->fastMalloc(totalsize); + + refcount = (int*)((unsigned char*)data + offsetof(VkBufferMemory, refcount)); + *refcount = 1; + } +} + +void VkMat::create_like(const Mat& m, VkAllocator* _allocator) +{ + int _dims = m.dims; + if (_dims == 1) + create(m.w, m.elemsize, m.elempack, _allocator); + if (_dims == 2) + create(m.w, m.h, m.elemsize, m.elempack, _allocator); + if (_dims == 3) + create(m.w, m.h, m.c, m.elemsize, m.elempack, _allocator); +} + +void VkMat::create_like(const VkMat& m, VkAllocator* _allocator) +{ + int _dims = m.dims; + if (_dims == 1) + create(m.w, m.elemsize, m.elempack, _allocator); + if (_dims == 2) + create(m.w, m.h, m.elemsize, m.elempack, _allocator); + if (_dims == 3) + create(m.w, m.h, m.c, m.elemsize, m.elempack, _allocator); +} + +void VkMat::create_like(const VkImageMat& im, VkAllocator* _allocator) +{ + int _dims = im.dims; + if (_dims == 1) + create(im.w, im.elemsize, im.elempack, _allocator); + if (_dims == 2) + create(im.w, im.h, im.elemsize, im.elempack, _allocator); + if (_dims == 3) + create(im.w, im.h, im.c, im.elemsize, im.elempack, _allocator); +} + +void VkMat::addref() +{ + if (refcount) + NCNN_XADD(refcount, 1); +} + +void VkMat::release() +{ + if (refcount && NCNN_XADD(refcount, -1) == 1) + { + if (allocator && data) + { + allocator->fastFree(data); + } + } + + data = 0; + + elemsize = 0; + elempack = 0; + + dims = 0; + w = 0; + h = 0; + c = 0; + + cstep = 0; + + refcount = 0; +} + +VkImageMat& VkImageMat::operator=(const VkImageMat& m) +{ + if (this == &m) + return *this; + + if (m.refcount) + NCNN_XADD(m.refcount, 1); + + release(); + + data = m.data; + refcount = m.refcount; + elemsize = m.elemsize; + elempack = m.elempack; + allocator = m.allocator; + + dims = m.dims; + w = m.w; + h = m.h; + c = m.c; + + return *this; +} + +void VkImageMat::create(int _w, size_t _elemsize, VkAllocator* _allocator) +{ + if (dims == 1 && w == _w && elemsize == _elemsize && elempack == 1 && allocator == _allocator) + return; + + release(); + + elemsize = _elemsize; + elempack = 1; + allocator = _allocator; + + dims = 1; + w = _w; + h = 1; + c = 1; + + if (total() > 0) + { + data = allocator->fastMalloc(w, h, c, elemsize, elempack); + if (!data) + return; + + refcount = (int*)((unsigned char*)data + offsetof(VkImageMemory, refcount)); + *refcount = 1; + } +} + +void VkImageMat::create(int _w, int _h, size_t _elemsize, VkAllocator* _allocator) +{ + if (dims == 2 && w == _w && h == _h && elemsize == _elemsize && elempack == 1 && allocator == _allocator) + return; + + release(); + + elemsize = _elemsize; + elempack = 1; + allocator = _allocator; + + dims = 2; + w = _w; + h = _h; + c = 1; + + if (total() > 0) + { + data = allocator->fastMalloc(w, h, c, elemsize, elempack); + if (!data) + return; + + refcount = (int*)((unsigned char*)data + offsetof(VkImageMemory, refcount)); + *refcount = 1; + } +} + +void VkImageMat::create(int _w, int _h, int _c, size_t _elemsize, VkAllocator* _allocator) +{ + if (dims == 3 && w == _w && h == _h && c == _c && elemsize == _elemsize && elempack == 1 && allocator == _allocator) + return; + + release(); + + elemsize = _elemsize; + elempack = 1; + allocator = _allocator; + + dims = 3; + w = _w; + h = _h; + c = _c; + + if (total() > 0) + { + data = allocator->fastMalloc(w, h, c, elemsize, elempack); + if (!data) + return; + + refcount = (int*)((unsigned char*)data + offsetof(VkImageMemory, refcount)); + *refcount = 1; + } +} + +void VkImageMat::create(int _w, size_t _elemsize, int _elempack, VkAllocator* _allocator) +{ + if (dims == 1 && w == _w && elemsize == _elemsize && elempack == _elempack && allocator == _allocator) + return; + + release(); + + elemsize = _elemsize; + elempack = _elempack; + allocator = _allocator; + + dims = 1; + w = _w; + h = 1; + c = 1; + + if (total() > 0) + { + data = allocator->fastMalloc(w, h, c, elemsize, elempack); + if (!data) + return; + + refcount = (int*)((unsigned char*)data + offsetof(VkImageMemory, refcount)); + *refcount = 1; + } +} + +void VkImageMat::create(int _w, int _h, size_t _elemsize, int _elempack, VkAllocator* _allocator) +{ + if (dims == 2 && w == _w && h == _h && elemsize == _elemsize && elempack == _elempack && allocator == _allocator) + return; + + release(); + + elemsize = _elemsize; + elempack = _elempack; + allocator = _allocator; + + dims = 2; + w = _w; + h = _h; + c = 1; + + if (total() > 0) + { + data = allocator->fastMalloc(w, h, c, elemsize, elempack); + if (!data) + return; + + refcount = (int*)((unsigned char*)data + offsetof(VkImageMemory, refcount)); + *refcount = 1; + } +} + +void VkImageMat::create(int _w, int _h, int _c, size_t _elemsize, int _elempack, VkAllocator* _allocator) +{ + if (dims == 3 && w == _w && h == _h && c == _c && elemsize == _elemsize && elempack == _elempack && allocator == _allocator) + return; + + release(); + + elemsize = _elemsize; + elempack = _elempack; + allocator = _allocator; + + dims = 3; + w = _w; + h = _h; + c = _c; + + if (total() > 0) + { + data = allocator->fastMalloc(w, h, c, elemsize, elempack); + if (!data) + return; + + refcount = (int*)((unsigned char*)data + offsetof(VkImageMemory, refcount)); + *refcount = 1; + } +} + +void VkImageMat::create_like(const Mat& m, VkAllocator* _allocator) +{ + int _dims = m.dims; + if (_dims == 1) + create(m.w, m.elemsize, m.elempack, _allocator); + if (_dims == 2) + create(m.w, m.h, m.elemsize, m.elempack, _allocator); + if (_dims == 3) + create(m.w, m.h, m.c, m.elemsize, m.elempack, _allocator); +} + +void VkImageMat::create_like(const VkMat& m, VkAllocator* _allocator) +{ + int _dims = m.dims; + if (_dims == 1) + create(m.w, m.elemsize, m.elempack, _allocator); + if (_dims == 2) + create(m.w, m.h, m.elemsize, m.elempack, _allocator); + if (_dims == 3) + create(m.w, m.h, m.c, m.elemsize, m.elempack, _allocator); +} + +void VkImageMat::create_like(const VkImageMat& im, VkAllocator* _allocator) +{ + int _dims = im.dims; + if (_dims == 1) + create(im.w, im.elemsize, im.elempack, _allocator); + if (_dims == 2) + create(im.w, im.h, im.elemsize, im.elempack, _allocator); + if (_dims == 3) + create(im.w, im.h, im.c, im.elemsize, im.elempack, _allocator); +} + +void VkImageMat::addref() +{ + if (refcount) + NCNN_XADD(refcount, 1); +} + +void VkImageMat::release() +{ + if (refcount && NCNN_XADD(refcount, -1) == 1) + { + if (allocator && data) + { + allocator->fastFree(data); + } + } + + data = 0; + + elemsize = 0; + elempack = 0; + + dims = 0; + w = 0; + h = 0; + c = 0; + + refcount = 0; +} +#endif // NCNN_VULKAN + void Mat::substract_mean_normalize(const float* mean_vals, const float* norm_vals) { Layer* op; diff --git a/src/mat.h b/src/mat.h index 8eb833853..29b3db49c 100644 --- a/src/mat.h +++ b/src/mat.h @@ -707,8 +707,7 @@ inline Mat::Mat(int _w, int _h, int _c, size_t _elemsize, int _elempack, Allocat inline Mat::Mat(const Mat& m) : data(m.data), refcount(m.refcount), elemsize(m.elemsize), elempack(m.elempack), allocator(m.allocator), dims(m.dims), w(m.w), h(m.h), c(m.c), cstep(m.cstep) { - if (refcount) - NCNN_XADD(refcount, 1); + addref(); } inline Mat::Mat(int _w, void* _data, size_t _elemsize, Allocator* _allocator) @@ -752,32 +751,6 @@ inline Mat::~Mat() release(); } -inline Mat& Mat::operator=(const Mat& m) -{ - if (this == &m) - return *this; - - if (m.refcount) - NCNN_XADD(m.refcount, 1); - - release(); - - data = m.data; - refcount = m.refcount; - elemsize = m.elemsize; - elempack = m.elempack; - allocator = m.allocator; - - dims = m.dims; - w = m.w; - h = m.h; - c = m.c; - - cstep = m.cstep; - - return *this; -} - inline void Mat::fill(float _v) { int size = (int)total(); @@ -961,386 +934,6 @@ inline void Mat::fill(T _v) } } -inline Mat Mat::clone(Allocator* _allocator) const -{ - if (empty()) - return Mat(); - - Mat m; - if (dims == 1) - m.create(w, elemsize, elempack, _allocator); - else if (dims == 2) - m.create(w, h, elemsize, elempack, _allocator); - else if (dims == 3) - m.create(w, h, c, elemsize, elempack, _allocator); - - if (total() > 0) - { - memcpy(m.data, data, total() * elemsize); - } - - return m; -} - -inline void Mat::clone_from(const ncnn::Mat& mat, Allocator* allocator) -{ - *this = mat.clone(allocator); -} - -inline Mat Mat::reshape(int _w, Allocator* _allocator) const -{ - if (w * h * c != _w) - return Mat(); - - if (dims == 3 && cstep != (size_t)w * h) - { - Mat m; - m.create(_w, elemsize, elempack, _allocator); - - // flatten - for (int i = 0; i < c; i++) - { - const void* ptr = (unsigned char*)data + i * cstep * elemsize; - void* mptr = (unsigned char*)m.data + (size_t)i * w * h * elemsize; - memcpy(mptr, ptr, (size_t)w * h * elemsize); - } - - return m; - } - - Mat m = *this; - - m.dims = 1; - m.w = _w; - m.h = 1; - m.c = 1; - - m.cstep = _w; - - return m; -} - -inline Mat Mat::reshape(int _w, int _h, Allocator* _allocator) const -{ - if (w * h * c != _w * _h) - return Mat(); - - if (dims == 3 && cstep != (size_t)w * h) - { - Mat m; - m.create(_w, _h, elemsize, elempack, _allocator); - - // flatten - for (int i = 0; i < c; i++) - { - const void* ptr = (unsigned char*)data + i * cstep * elemsize; - void* mptr = (unsigned char*)m.data + (size_t)i * w * h * elemsize; - memcpy(mptr, ptr, (size_t)w * h * elemsize); - } - - return m; - } - - Mat m = *this; - - m.dims = 2; - m.w = _w; - m.h = _h; - m.c = 1; - - m.cstep = (size_t)_w * _h; - - return m; -} - -inline Mat Mat::reshape(int _w, int _h, int _c, Allocator* _allocator) const -{ - if (w * h * c != _w * _h * _c) - return Mat(); - - if (dims < 3) - { - if ((size_t)_w * _h != alignSize((size_t)_w * _h * elemsize, 16) / elemsize) - { - Mat m; - m.create(_w, _h, _c, elemsize, elempack, _allocator); - - // align channel - for (int i = 0; i < _c; i++) - { - const void* ptr = (unsigned char*)data + (size_t)i * _w * _h * elemsize; - void* mptr = (unsigned char*)m.data + i * m.cstep * m.elemsize; - memcpy(mptr, ptr, (size_t)_w * _h * elemsize); - } - - return m; - } - } - else if (c != _c) - { - // flatten and then align - Mat tmp = reshape(_w * _h * _c, _allocator); - return tmp.reshape(_w, _h, _c, _allocator); - } - - Mat m = *this; - - m.dims = 3; - m.w = _w; - m.h = _h; - m.c = _c; - - m.cstep = alignSize((size_t)_w * _h * elemsize, 16) / elemsize; - - return m; -} - -inline void Mat::create(int _w, size_t _elemsize, Allocator* _allocator) -{ - if (dims == 1 && w == _w && elemsize == _elemsize && elempack == 1 && allocator == _allocator) - return; - - release(); - - elemsize = _elemsize; - elempack = 1; - allocator = _allocator; - - dims = 1; - w = _w; - h = 1; - c = 1; - - cstep = w; - - if (total() > 0) - { - size_t totalsize = alignSize(total() * elemsize, 4); - if (allocator) - data = allocator->fastMalloc(totalsize + (int)sizeof(*refcount)); - else - data = fastMalloc(totalsize + (int)sizeof(*refcount)); - refcount = (int*)(((unsigned char*)data) + totalsize); - *refcount = 1; - } -} - -inline void Mat::create(int _w, int _h, size_t _elemsize, Allocator* _allocator) -{ - if (dims == 2 && w == _w && h == _h && elemsize == _elemsize && elempack == 1 && allocator == _allocator) - return; - - release(); - - elemsize = _elemsize; - elempack = 1; - allocator = _allocator; - - dims = 2; - w = _w; - h = _h; - c = 1; - - cstep = (size_t)w * h; - - if (total() > 0) - { - size_t totalsize = alignSize(total() * elemsize, 4); - if (allocator) - data = allocator->fastMalloc(totalsize + (int)sizeof(*refcount)); - else - data = fastMalloc(totalsize + (int)sizeof(*refcount)); - refcount = (int*)(((unsigned char*)data) + totalsize); - *refcount = 1; - } -} - -inline void Mat::create(int _w, int _h, int _c, size_t _elemsize, Allocator* _allocator) -{ - if (dims == 3 && w == _w && h == _h && c == _c && elemsize == _elemsize && elempack == 1 && allocator == _allocator) - return; - - release(); - - elemsize = _elemsize; - elempack = 1; - allocator = _allocator; - - dims = 3; - w = _w; - h = _h; - c = _c; - - cstep = alignSize((size_t)w * h * elemsize, 16) / elemsize; - - if (total() > 0) - { - size_t totalsize = alignSize(total() * elemsize, 4); - if (allocator) - data = allocator->fastMalloc(totalsize + (int)sizeof(*refcount)); - else - data = fastMalloc(totalsize + (int)sizeof(*refcount)); - refcount = (int*)(((unsigned char*)data) + totalsize); - *refcount = 1; - } -} - -inline void Mat::create(int _w, size_t _elemsize, int _elempack, Allocator* _allocator) -{ - if (dims == 1 && w == _w && elemsize == _elemsize && elempack == _elempack && allocator == _allocator) - return; - - release(); - - elemsize = _elemsize; - elempack = _elempack; - allocator = _allocator; - - dims = 1; - w = _w; - h = 1; - c = 1; - - cstep = w; - - if (total() > 0) - { - size_t totalsize = alignSize(total() * elemsize, 4); - if (allocator) - data = allocator->fastMalloc(totalsize + (int)sizeof(*refcount)); - else - data = fastMalloc(totalsize + (int)sizeof(*refcount)); - refcount = (int*)(((unsigned char*)data) + totalsize); - *refcount = 1; - } -} - -inline void Mat::create(int _w, int _h, size_t _elemsize, int _elempack, Allocator* _allocator) -{ - if (dims == 2 && w == _w && h == _h && elemsize == _elemsize && elempack == _elempack && allocator == _allocator) - return; - - release(); - - elemsize = _elemsize; - elempack = _elempack; - allocator = _allocator; - - dims = 2; - w = _w; - h = _h; - c = 1; - - cstep = (size_t)w * h; - - if (total() > 0) - { - size_t totalsize = alignSize(total() * elemsize, 4); - if (allocator) - data = allocator->fastMalloc(totalsize + (int)sizeof(*refcount)); - else - data = fastMalloc(totalsize + (int)sizeof(*refcount)); - refcount = (int*)(((unsigned char*)data) + totalsize); - *refcount = 1; - } -} - -inline void Mat::create(int _w, int _h, int _c, size_t _elemsize, int _elempack, Allocator* _allocator) -{ - if (dims == 3 && w == _w && h == _h && c == _c && elemsize == _elemsize && elempack == _elempack && allocator == _allocator) - return; - - release(); - - elemsize = _elemsize; - elempack = _elempack; - allocator = _allocator; - - dims = 3; - w = _w; - h = _h; - c = _c; - - cstep = alignSize((size_t)w * h * elemsize, 16) / elemsize; - - if (total() > 0) - { - size_t totalsize = alignSize(total() * elemsize, 4); - if (allocator) - data = allocator->fastMalloc(totalsize + (int)sizeof(*refcount)); - else - data = fastMalloc(totalsize + (int)sizeof(*refcount)); - refcount = (int*)(((unsigned char*)data) + totalsize); - *refcount = 1; - } -} - -inline void Mat::create_like(const Mat& m, Allocator* _allocator) -{ - int _dims = m.dims; - if (_dims == 1) - create(m.w, m.elemsize, m.elempack, _allocator); - if (_dims == 2) - create(m.w, m.h, m.elemsize, m.elempack, _allocator); - if (_dims == 3) - create(m.w, m.h, m.c, m.elemsize, m.elempack, _allocator); -} - -#if NCNN_VULKAN -inline void Mat::create_like(const VkMat& m, Allocator* _allocator) -{ - int _dims = m.dims; - if (_dims == 1) - create(m.w, m.elemsize, m.elempack, _allocator); - if (_dims == 2) - create(m.w, m.h, m.elemsize, m.elempack, _allocator); - if (_dims == 3) - create(m.w, m.h, m.c, m.elemsize, m.elempack, _allocator); -} - -inline void Mat::create_like(const VkImageMat& im, Allocator* _allocator) -{ - int _dims = im.dims; - if (_dims == 1) - create(im.w, im.elemsize, im.elempack, _allocator); - if (_dims == 2) - create(im.w, im.h, im.elemsize, im.elempack, _allocator); - if (_dims == 3) - create(im.w, im.h, im.c, im.elemsize, im.elempack, _allocator); -} -#endif // NCNN_VULKAN - -inline void Mat::addref() -{ - if (refcount) - NCNN_XADD(refcount, 1); -} - -inline void Mat::release() -{ - if (refcount && NCNN_XADD(refcount, -1) == 1) - { - if (allocator) - allocator->fastFree(data); - else - fastFree(data); - } - - data = 0; - - elemsize = 0; - elempack = 0; - - dims = 0; - w = 0; - h = 0; - c = 0; - - cstep = 0; - - refcount = 0; -} - inline bool Mat::empty() const { return data == 0 || total() == 0; @@ -1498,8 +1091,7 @@ inline VkMat::VkMat(int _w, int _h, int _c, size_t _elemsize, int _elempack, VkA inline VkMat::VkMat(const VkMat& m) : data(m.data), refcount(m.refcount), elemsize(m.elemsize), elempack(m.elempack), allocator(m.allocator), dims(m.dims), w(m.w), h(m.h), c(m.c) { - if (refcount) - NCNN_XADD(refcount, 1); + addref(); cstep = m.cstep; } @@ -1545,239 +1137,6 @@ inline VkMat::~VkMat() release(); } -inline VkMat& VkMat::operator=(const VkMat& m) -{ - if (this == &m) - return *this; - - if (m.refcount) - NCNN_XADD(m.refcount, 1); - - release(); - - data = m.data; - refcount = m.refcount; - elemsize = m.elemsize; - elempack = m.elempack; - allocator = m.allocator; - - dims = m.dims; - w = m.w; - h = m.h; - c = m.c; - - cstep = m.cstep; - - return *this; -} - -inline void VkMat::create(int _w, size_t _elemsize, VkAllocator* _allocator) -{ - if (dims == 1 && w == _w && elemsize == _elemsize && elempack == 1 && allocator == _allocator) - return; - - release(); - - elemsize = _elemsize; - elempack = 1; - allocator = _allocator; - - dims = 1; - w = _w; - h = 1; - c = 1; - - cstep = w; - - if (total() > 0) - { - size_t totalsize = alignSize(total() * elemsize, 4); - - data = allocator->fastMalloc(totalsize); - - refcount = (int*)((unsigned char*)data + offsetof(VkBufferMemory, refcount)); - *refcount = 1; - } -} - -inline void VkMat::create(int _w, int _h, size_t _elemsize, VkAllocator* _allocator) -{ - if (dims == 2 && w == _w && h == _h && elemsize == _elemsize && elempack == 1 && allocator == _allocator) - return; - - release(); - - elemsize = _elemsize; - elempack = 1; - allocator = _allocator; - - dims = 2; - w = _w; - h = _h; - c = 1; - - cstep = w * h; - - if (total() > 0) - { - size_t totalsize = alignSize(total() * elemsize, 4); - - data = allocator->fastMalloc(totalsize); - - refcount = (int*)((unsigned char*)data + offsetof(VkBufferMemory, refcount)); - *refcount = 1; - } -} - -inline void VkMat::create(int _w, int _h, int _c, size_t _elemsize, VkAllocator* _allocator) -{ - if (dims == 3 && w == _w && h == _h && c == _c && elemsize == _elemsize && elempack == 1 && allocator == _allocator) - return; - - release(); - - elemsize = _elemsize; - elempack = 1; - allocator = _allocator; - - dims = 3; - w = _w; - h = _h; - c = _c; - - cstep = alignSize(w * h * elemsize, 16) / elemsize; - - if (total() > 0) - { - size_t totalsize = alignSize(total() * elemsize, 4); - - data = allocator->fastMalloc(totalsize); - - refcount = (int*)((unsigned char*)data + offsetof(VkBufferMemory, refcount)); - *refcount = 1; - } -} - -inline void VkMat::create(int _w, size_t _elemsize, int _elempack, VkAllocator* _allocator) -{ - if (dims == 1 && w == _w && elemsize == _elemsize && elempack == _elempack && allocator == _allocator) - return; - - release(); - - elemsize = _elemsize; - elempack = _elempack; - allocator = _allocator; - - dims = 1; - w = _w; - h = 1; - c = 1; - - cstep = w; - - if (total() > 0) - { - size_t totalsize = alignSize(total() * elemsize, 4); - - data = allocator->fastMalloc(totalsize); - - refcount = (int*)((unsigned char*)data + offsetof(VkBufferMemory, refcount)); - *refcount = 1; - } -} - -inline void VkMat::create(int _w, int _h, size_t _elemsize, int _elempack, VkAllocator* _allocator) -{ - if (dims == 2 && w == _w && h == _h && elemsize == _elemsize && elempack == _elempack && allocator == _allocator) - return; - - release(); - - elemsize = _elemsize; - elempack = _elempack; - allocator = _allocator; - - dims = 2; - w = _w; - h = _h; - c = 1; - - cstep = w * h; - - if (total() > 0) - { - size_t totalsize = alignSize(total() * elemsize, 4); - - data = allocator->fastMalloc(totalsize); - - refcount = (int*)((unsigned char*)data + offsetof(VkBufferMemory, refcount)); - *refcount = 1; - } -} - -inline void VkMat::create(int _w, int _h, int _c, size_t _elemsize, int _elempack, VkAllocator* _allocator) -{ - if (dims == 3 && w == _w && h == _h && c == _c && elemsize == _elemsize && elempack == _elempack && allocator == _allocator) - return; - - release(); - - elemsize = _elemsize; - elempack = _elempack; - allocator = _allocator; - - dims = 3; - w = _w; - h = _h; - c = _c; - - cstep = alignSize(w * h * elemsize, 16) / elemsize; - - if (total() > 0) - { - size_t totalsize = alignSize(total() * elemsize, 4); - - data = allocator->fastMalloc(totalsize); - - refcount = (int*)((unsigned char*)data + offsetof(VkBufferMemory, refcount)); - *refcount = 1; - } -} - -inline void VkMat::create_like(const Mat& m, VkAllocator* _allocator) -{ - int _dims = m.dims; - if (_dims == 1) - create(m.w, m.elemsize, m.elempack, _allocator); - if (_dims == 2) - create(m.w, m.h, m.elemsize, m.elempack, _allocator); - if (_dims == 3) - create(m.w, m.h, m.c, m.elemsize, m.elempack, _allocator); -} - -inline void VkMat::create_like(const VkMat& m, VkAllocator* _allocator) -{ - int _dims = m.dims; - if (_dims == 1) - create(m.w, m.elemsize, m.elempack, _allocator); - if (_dims == 2) - create(m.w, m.h, m.elemsize, m.elempack, _allocator); - if (_dims == 3) - create(m.w, m.h, m.c, m.elemsize, m.elempack, _allocator); -} - -inline void VkMat::create_like(const VkImageMat& im, VkAllocator* _allocator) -{ - int _dims = im.dims; - if (_dims == 1) - create(im.w, im.elemsize, im.elempack, _allocator); - if (_dims == 2) - create(im.w, im.h, im.elemsize, im.elempack, _allocator); - if (_dims == 3) - create(im.w, im.h, im.c, im.elemsize, im.elempack, _allocator); -} - inline Mat VkMat::mapped() const { if (!allocator->mappable) @@ -1803,37 +1162,6 @@ inline void* VkMat::mapped_ptr() const return (unsigned char*)data->mapped_ptr + data->offset; } -inline void VkMat::addref() -{ - if (refcount) - NCNN_XADD(refcount, 1); -} - -inline void VkMat::release() -{ - if (refcount && NCNN_XADD(refcount, -1) == 1) - { - if (allocator && data) - { - allocator->fastFree(data); - } - } - - data = 0; - - elemsize = 0; - elempack = 0; - - dims = 0; - w = 0; - h = 0; - c = 0; - - cstep = 0; - - refcount = 0; -} - inline bool VkMat::empty() const { return data == 0 || total() == 0; @@ -1920,8 +1248,7 @@ inline VkImageMat::VkImageMat(int _w, int _h, int _c, size_t _elemsize, int _ele inline VkImageMat::VkImageMat(const VkImageMat& m) : data(m.data), refcount(m.refcount), elemsize(m.elemsize), elempack(m.elempack), allocator(m.allocator), dims(m.dims), w(m.w), h(m.h), c(m.c) { - if (refcount) - NCNN_XADD(refcount, 1); + addref(); } inline VkImageMat::VkImageMat(int _w, VkImageMemory* _data, size_t _elemsize, VkAllocator* _allocator) @@ -1959,225 +1286,6 @@ inline VkImageMat::~VkImageMat() release(); } -inline VkImageMat& VkImageMat::operator=(const VkImageMat& m) -{ - if (this == &m) - return *this; - - if (m.refcount) - NCNN_XADD(m.refcount, 1); - - release(); - - data = m.data; - refcount = m.refcount; - elemsize = m.elemsize; - elempack = m.elempack; - allocator = m.allocator; - - dims = m.dims; - w = m.w; - h = m.h; - c = m.c; - - return *this; -} - -inline void VkImageMat::create(int _w, size_t _elemsize, VkAllocator* _allocator) -{ - if (dims == 1 && w == _w && elemsize == _elemsize && elempack == 1 && allocator == _allocator) - return; - - release(); - - elemsize = _elemsize; - elempack = 1; - allocator = _allocator; - - dims = 1; - w = _w; - h = 1; - c = 1; - - if (total() > 0) - { - data = allocator->fastMalloc(w, h, c, elemsize, elempack); - if (!data) - return; - - refcount = (int*)((unsigned char*)data + offsetof(VkImageMemory, refcount)); - *refcount = 1; - } -} - -inline void VkImageMat::create(int _w, int _h, size_t _elemsize, VkAllocator* _allocator) -{ - if (dims == 2 && w == _w && h == _h && elemsize == _elemsize && elempack == 1 && allocator == _allocator) - return; - - release(); - - elemsize = _elemsize; - elempack = 1; - allocator = _allocator; - - dims = 2; - w = _w; - h = _h; - c = 1; - - if (total() > 0) - { - data = allocator->fastMalloc(w, h, c, elemsize, elempack); - if (!data) - return; - - refcount = (int*)((unsigned char*)data + offsetof(VkImageMemory, refcount)); - *refcount = 1; - } -} - -inline void VkImageMat::create(int _w, int _h, int _c, size_t _elemsize, VkAllocator* _allocator) -{ - if (dims == 3 && w == _w && h == _h && c == _c && elemsize == _elemsize && elempack == 1 && allocator == _allocator) - return; - - release(); - - elemsize = _elemsize; - elempack = 1; - allocator = _allocator; - - dims = 3; - w = _w; - h = _h; - c = _c; - - if (total() > 0) - { - data = allocator->fastMalloc(w, h, c, elemsize, elempack); - if (!data) - return; - - refcount = (int*)((unsigned char*)data + offsetof(VkImageMemory, refcount)); - *refcount = 1; - } -} - -inline void VkImageMat::create(int _w, size_t _elemsize, int _elempack, VkAllocator* _allocator) -{ - if (dims == 1 && w == _w && elemsize == _elemsize && elempack == _elempack && allocator == _allocator) - return; - - release(); - - elemsize = _elemsize; - elempack = _elempack; - allocator = _allocator; - - dims = 1; - w = _w; - h = 1; - c = 1; - - if (total() > 0) - { - data = allocator->fastMalloc(w, h, c, elemsize, elempack); - if (!data) - return; - - refcount = (int*)((unsigned char*)data + offsetof(VkImageMemory, refcount)); - *refcount = 1; - } -} - -inline void VkImageMat::create(int _w, int _h, size_t _elemsize, int _elempack, VkAllocator* _allocator) -{ - if (dims == 2 && w == _w && h == _h && elemsize == _elemsize && elempack == _elempack && allocator == _allocator) - return; - - release(); - - elemsize = _elemsize; - elempack = _elempack; - allocator = _allocator; - - dims = 2; - w = _w; - h = _h; - c = 1; - - if (total() > 0) - { - data = allocator->fastMalloc(w, h, c, elemsize, elempack); - if (!data) - return; - - refcount = (int*)((unsigned char*)data + offsetof(VkImageMemory, refcount)); - *refcount = 1; - } -} - -inline void VkImageMat::create(int _w, int _h, int _c, size_t _elemsize, int _elempack, VkAllocator* _allocator) -{ - if (dims == 3 && w == _w && h == _h && c == _c && elemsize == _elemsize && elempack == _elempack && allocator == _allocator) - return; - - release(); - - elemsize = _elemsize; - elempack = _elempack; - allocator = _allocator; - - dims = 3; - w = _w; - h = _h; - c = _c; - - if (total() > 0) - { - data = allocator->fastMalloc(w, h, c, elemsize, elempack); - if (!data) - return; - - refcount = (int*)((unsigned char*)data + offsetof(VkImageMemory, refcount)); - *refcount = 1; - } -} - -inline void VkImageMat::create_like(const Mat& m, VkAllocator* _allocator) -{ - int _dims = m.dims; - if (_dims == 1) - create(m.w, m.elemsize, m.elempack, _allocator); - if (_dims == 2) - create(m.w, m.h, m.elemsize, m.elempack, _allocator); - if (_dims == 3) - create(m.w, m.h, m.c, m.elemsize, m.elempack, _allocator); -} - -inline void VkImageMat::create_like(const VkMat& m, VkAllocator* _allocator) -{ - int _dims = m.dims; - if (_dims == 1) - create(m.w, m.elemsize, m.elempack, _allocator); - if (_dims == 2) - create(m.w, m.h, m.elemsize, m.elempack, _allocator); - if (_dims == 3) - create(m.w, m.h, m.c, m.elemsize, m.elempack, _allocator); -} - -inline void VkImageMat::create_like(const VkImageMat& im, VkAllocator* _allocator) -{ - int _dims = im.dims; - if (_dims == 1) - create(im.w, im.elemsize, im.elempack, _allocator); - if (_dims == 2) - create(im.w, im.h, im.elemsize, im.elempack, _allocator); - if (_dims == 3) - create(im.w, im.h, im.c, im.elemsize, im.elempack, _allocator); -} - inline Mat VkImageMat::mapped() const { if (!allocator->mappable || !data->mapped_ptr) @@ -2203,35 +1311,6 @@ inline void* VkImageMat::mapped_ptr() const return (unsigned char*)data->mapped_ptr + data->bind_offset; } -inline void VkImageMat::addref() -{ - if (refcount) - NCNN_XADD(refcount, 1); -} - -inline void VkImageMat::release() -{ - if (refcount && NCNN_XADD(refcount, -1) == 1) - { - if (allocator && data) - { - allocator->fastFree(data); - } - } - - data = 0; - - elemsize = 0; - elempack = 0; - - dims = 0; - w = 0; - h = 0; - c = 0; - - refcount = 0; -} - inline bool VkImageMat::empty() const { return data == 0 || total() == 0;