Browse Source

fix illegal instruction on pi4 when NCNN_ARM82 enabled

compiler may compile inline member functions as noinline blocks for different architectures, and linker may pick the newer arch, that results illegal instructions on old hardware
tags/20210507
nihui GitHub 5 years ago
parent
commit
d7cbc055f3
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 923 additions and 924 deletions
  1. +920
    -0
      src/mat.cpp
  2. +3
    -924
      src/mat.h

+ 920
- 0
src/mat.cpp View File

@@ -33,6 +33,926 @@

namespace ncnn {

Mat& Mat::operator=(const Mat& m)
{
if (this == &m)
return *this;

if (m.refcount)
NCNN_XADD(m.refcount, 1);

release();

data = m.data;
refcount = m.refcount;
elemsize = m.elemsize;
elempack = m.elempack;
allocator = m.allocator;

dims = m.dims;
w = m.w;
h = m.h;
c = m.c;

cstep = m.cstep;

return *this;
}

Mat Mat::clone(Allocator* _allocator) const
{
if (empty())
return Mat();

Mat m;
if (dims == 1)
m.create(w, elemsize, elempack, _allocator);
else if (dims == 2)
m.create(w, h, elemsize, elempack, _allocator);
else if (dims == 3)
m.create(w, h, c, elemsize, elempack, _allocator);

if (total() > 0)
{
memcpy(m.data, data, total() * elemsize);
}

return m;
}

void Mat::clone_from(const ncnn::Mat& mat, Allocator* allocator)
{
*this = mat.clone(allocator);
}

Mat Mat::reshape(int _w, Allocator* _allocator) const
{
if (w * h * c != _w)
return Mat();

if (dims == 3 && cstep != (size_t)w * h)
{
Mat m;
m.create(_w, elemsize, elempack, _allocator);

// flatten
for (int i = 0; i < c; i++)
{
const void* ptr = (unsigned char*)data + i * cstep * elemsize;
void* mptr = (unsigned char*)m.data + (size_t)i * w * h * elemsize;
memcpy(mptr, ptr, (size_t)w * h * elemsize);
}

return m;
}

Mat m = *this;

m.dims = 1;
m.w = _w;
m.h = 1;
m.c = 1;

m.cstep = _w;

return m;
}

Mat Mat::reshape(int _w, int _h, Allocator* _allocator) const
{
if (w * h * c != _w * _h)
return Mat();

if (dims == 3 && cstep != (size_t)w * h)
{
Mat m;
m.create(_w, _h, elemsize, elempack, _allocator);

// flatten
for (int i = 0; i < c; i++)
{
const void* ptr = (unsigned char*)data + i * cstep * elemsize;
void* mptr = (unsigned char*)m.data + (size_t)i * w * h * elemsize;
memcpy(mptr, ptr, (size_t)w * h * elemsize);
}

return m;
}

Mat m = *this;

m.dims = 2;
m.w = _w;
m.h = _h;
m.c = 1;

m.cstep = (size_t)_w * _h;

return m;
}

Mat Mat::reshape(int _w, int _h, int _c, Allocator* _allocator) const
{
if (w * h * c != _w * _h * _c)
return Mat();

if (dims < 3)
{
if ((size_t)_w * _h != alignSize((size_t)_w * _h * elemsize, 16) / elemsize)
{
Mat m;
m.create(_w, _h, _c, elemsize, elempack, _allocator);

// align channel
for (int i = 0; i < _c; i++)
{
const void* ptr = (unsigned char*)data + (size_t)i * _w * _h * elemsize;
void* mptr = (unsigned char*)m.data + i * m.cstep * m.elemsize;
memcpy(mptr, ptr, (size_t)_w * _h * elemsize);
}

return m;
}
}
else if (c != _c)
{
// flatten and then align
Mat tmp = reshape(_w * _h * _c, _allocator);
return tmp.reshape(_w, _h, _c, _allocator);
}

Mat m = *this;

m.dims = 3;
m.w = _w;
m.h = _h;
m.c = _c;

m.cstep = alignSize((size_t)_w * _h * elemsize, 16) / elemsize;

return m;
}

void Mat::create(int _w, size_t _elemsize, Allocator* _allocator)
{
if (dims == 1 && w == _w && elemsize == _elemsize && elempack == 1 && allocator == _allocator)
return;

release();

elemsize = _elemsize;
elempack = 1;
allocator = _allocator;

dims = 1;
w = _w;
h = 1;
c = 1;

cstep = w;

if (total() > 0)
{
size_t totalsize = alignSize(total() * elemsize, 4);
if (allocator)
data = allocator->fastMalloc(totalsize + (int)sizeof(*refcount));
else
data = fastMalloc(totalsize + (int)sizeof(*refcount));
refcount = (int*)(((unsigned char*)data) + totalsize);
*refcount = 1;
}
}

void Mat::create(int _w, int _h, size_t _elemsize, Allocator* _allocator)
{
if (dims == 2 && w == _w && h == _h && elemsize == _elemsize && elempack == 1 && allocator == _allocator)
return;

release();

elemsize = _elemsize;
elempack = 1;
allocator = _allocator;

dims = 2;
w = _w;
h = _h;
c = 1;

cstep = (size_t)w * h;

if (total() > 0)
{
size_t totalsize = alignSize(total() * elemsize, 4);
if (allocator)
data = allocator->fastMalloc(totalsize + (int)sizeof(*refcount));
else
data = fastMalloc(totalsize + (int)sizeof(*refcount));
refcount = (int*)(((unsigned char*)data) + totalsize);
*refcount = 1;
}
}

void Mat::create(int _w, int _h, int _c, size_t _elemsize, Allocator* _allocator)
{
if (dims == 3 && w == _w && h == _h && c == _c && elemsize == _elemsize && elempack == 1 && allocator == _allocator)
return;

release();

elemsize = _elemsize;
elempack = 1;
allocator = _allocator;

dims = 3;
w = _w;
h = _h;
c = _c;

cstep = alignSize((size_t)w * h * elemsize, 16) / elemsize;

if (total() > 0)
{
size_t totalsize = alignSize(total() * elemsize, 4);
if (allocator)
data = allocator->fastMalloc(totalsize + (int)sizeof(*refcount));
else
data = fastMalloc(totalsize + (int)sizeof(*refcount));
refcount = (int*)(((unsigned char*)data) + totalsize);
*refcount = 1;
}
}

void Mat::create(int _w, size_t _elemsize, int _elempack, Allocator* _allocator)
{
if (dims == 1 && w == _w && elemsize == _elemsize && elempack == _elempack && allocator == _allocator)
return;

release();

elemsize = _elemsize;
elempack = _elempack;
allocator = _allocator;

dims = 1;
w = _w;
h = 1;
c = 1;

cstep = w;

if (total() > 0)
{
size_t totalsize = alignSize(total() * elemsize, 4);
if (allocator)
data = allocator->fastMalloc(totalsize + (int)sizeof(*refcount));
else
data = fastMalloc(totalsize + (int)sizeof(*refcount));
refcount = (int*)(((unsigned char*)data) + totalsize);
*refcount = 1;
}
}

void Mat::create(int _w, int _h, size_t _elemsize, int _elempack, Allocator* _allocator)
{
if (dims == 2 && w == _w && h == _h && elemsize == _elemsize && elempack == _elempack && allocator == _allocator)
return;

release();

elemsize = _elemsize;
elempack = _elempack;
allocator = _allocator;

dims = 2;
w = _w;
h = _h;
c = 1;

cstep = (size_t)w * h;

if (total() > 0)
{
size_t totalsize = alignSize(total() * elemsize, 4);
if (allocator)
data = allocator->fastMalloc(totalsize + (int)sizeof(*refcount));
else
data = fastMalloc(totalsize + (int)sizeof(*refcount));
refcount = (int*)(((unsigned char*)data) + totalsize);
*refcount = 1;
}
}

void Mat::create(int _w, int _h, int _c, size_t _elemsize, int _elempack, Allocator* _allocator)
{
if (dims == 3 && w == _w && h == _h && c == _c && elemsize == _elemsize && elempack == _elempack && allocator == _allocator)
return;

release();

elemsize = _elemsize;
elempack = _elempack;
allocator = _allocator;

dims = 3;
w = _w;
h = _h;
c = _c;

cstep = alignSize((size_t)w * h * elemsize, 16) / elemsize;

if (total() > 0)
{
size_t totalsize = alignSize(total() * elemsize, 4);
if (allocator)
data = allocator->fastMalloc(totalsize + (int)sizeof(*refcount));
else
data = fastMalloc(totalsize + (int)sizeof(*refcount));
refcount = (int*)(((unsigned char*)data) + totalsize);
*refcount = 1;
}
}

void Mat::create_like(const Mat& m, Allocator* _allocator)
{
int _dims = m.dims;
if (_dims == 1)
create(m.w, m.elemsize, m.elempack, _allocator);
if (_dims == 2)
create(m.w, m.h, m.elemsize, m.elempack, _allocator);
if (_dims == 3)
create(m.w, m.h, m.c, m.elemsize, m.elempack, _allocator);
}

#if NCNN_VULKAN
void Mat::create_like(const VkMat& m, Allocator* _allocator)
{
int _dims = m.dims;
if (_dims == 1)
create(m.w, m.elemsize, m.elempack, _allocator);
if (_dims == 2)
create(m.w, m.h, m.elemsize, m.elempack, _allocator);
if (_dims == 3)
create(m.w, m.h, m.c, m.elemsize, m.elempack, _allocator);
}

void Mat::create_like(const VkImageMat& im, Allocator* _allocator)
{
int _dims = im.dims;
if (_dims == 1)
create(im.w, im.elemsize, im.elempack, _allocator);
if (_dims == 2)
create(im.w, im.h, im.elemsize, im.elempack, _allocator);
if (_dims == 3)
create(im.w, im.h, im.c, im.elemsize, im.elempack, _allocator);
}
#endif // NCNN_VULKAN

void Mat::addref()
{
if (refcount)
NCNN_XADD(refcount, 1);
}

void Mat::release()
{
if (refcount && NCNN_XADD(refcount, -1) == 1)
{
if (allocator)
allocator->fastFree(data);
else
fastFree(data);
}

data = 0;

elemsize = 0;
elempack = 0;

dims = 0;
w = 0;
h = 0;
c = 0;

cstep = 0;

refcount = 0;
}

#if NCNN_VULKAN
VkMat& VkMat::operator=(const VkMat& m)
{
if (this == &m)
return *this;

if (m.refcount)
NCNN_XADD(m.refcount, 1);

release();

data = m.data;
refcount = m.refcount;
elemsize = m.elemsize;
elempack = m.elempack;
allocator = m.allocator;

dims = m.dims;
w = m.w;
h = m.h;
c = m.c;

cstep = m.cstep;

return *this;
}

void VkMat::create(int _w, size_t _elemsize, VkAllocator* _allocator)
{
if (dims == 1 && w == _w && elemsize == _elemsize && elempack == 1 && allocator == _allocator)
return;

release();

elemsize = _elemsize;
elempack = 1;
allocator = _allocator;

dims = 1;
w = _w;
h = 1;
c = 1;

cstep = w;

if (total() > 0)
{
size_t totalsize = alignSize(total() * elemsize, 4);

data = allocator->fastMalloc(totalsize);

refcount = (int*)((unsigned char*)data + offsetof(VkBufferMemory, refcount));
*refcount = 1;
}
}

void VkMat::create(int _w, int _h, size_t _elemsize, VkAllocator* _allocator)
{
if (dims == 2 && w == _w && h == _h && elemsize == _elemsize && elempack == 1 && allocator == _allocator)
return;

release();

elemsize = _elemsize;
elempack = 1;
allocator = _allocator;

dims = 2;
w = _w;
h = _h;
c = 1;

cstep = w * h;

if (total() > 0)
{
size_t totalsize = alignSize(total() * elemsize, 4);

data = allocator->fastMalloc(totalsize);

refcount = (int*)((unsigned char*)data + offsetof(VkBufferMemory, refcount));
*refcount = 1;
}
}

void VkMat::create(int _w, int _h, int _c, size_t _elemsize, VkAllocator* _allocator)
{
if (dims == 3 && w == _w && h == _h && c == _c && elemsize == _elemsize && elempack == 1 && allocator == _allocator)
return;

release();

elemsize = _elemsize;
elempack = 1;
allocator = _allocator;

dims = 3;
w = _w;
h = _h;
c = _c;

cstep = alignSize(w * h * elemsize, 16) / elemsize;

if (total() > 0)
{
size_t totalsize = alignSize(total() * elemsize, 4);

data = allocator->fastMalloc(totalsize);

refcount = (int*)((unsigned char*)data + offsetof(VkBufferMemory, refcount));
*refcount = 1;
}
}

void VkMat::create(int _w, size_t _elemsize, int _elempack, VkAllocator* _allocator)
{
if (dims == 1 && w == _w && elemsize == _elemsize && elempack == _elempack && allocator == _allocator)
return;

release();

elemsize = _elemsize;
elempack = _elempack;
allocator = _allocator;

dims = 1;
w = _w;
h = 1;
c = 1;

cstep = w;

if (total() > 0)
{
size_t totalsize = alignSize(total() * elemsize, 4);

data = allocator->fastMalloc(totalsize);

refcount = (int*)((unsigned char*)data + offsetof(VkBufferMemory, refcount));
*refcount = 1;
}
}

void VkMat::create(int _w, int _h, size_t _elemsize, int _elempack, VkAllocator* _allocator)
{
if (dims == 2 && w == _w && h == _h && elemsize == _elemsize && elempack == _elempack && allocator == _allocator)
return;

release();

elemsize = _elemsize;
elempack = _elempack;
allocator = _allocator;

dims = 2;
w = _w;
h = _h;
c = 1;

cstep = w * h;

if (total() > 0)
{
size_t totalsize = alignSize(total() * elemsize, 4);

data = allocator->fastMalloc(totalsize);

refcount = (int*)((unsigned char*)data + offsetof(VkBufferMemory, refcount));
*refcount = 1;
}
}

void VkMat::create(int _w, int _h, int _c, size_t _elemsize, int _elempack, VkAllocator* _allocator)
{
if (dims == 3 && w == _w && h == _h && c == _c && elemsize == _elemsize && elempack == _elempack && allocator == _allocator)
return;

release();

elemsize = _elemsize;
elempack = _elempack;
allocator = _allocator;

dims = 3;
w = _w;
h = _h;
c = _c;

cstep = alignSize(w * h * elemsize, 16) / elemsize;

if (total() > 0)
{
size_t totalsize = alignSize(total() * elemsize, 4);

data = allocator->fastMalloc(totalsize);

refcount = (int*)((unsigned char*)data + offsetof(VkBufferMemory, refcount));
*refcount = 1;
}
}

void VkMat::create_like(const Mat& m, VkAllocator* _allocator)
{
int _dims = m.dims;
if (_dims == 1)
create(m.w, m.elemsize, m.elempack, _allocator);
if (_dims == 2)
create(m.w, m.h, m.elemsize, m.elempack, _allocator);
if (_dims == 3)
create(m.w, m.h, m.c, m.elemsize, m.elempack, _allocator);
}

void VkMat::create_like(const VkMat& m, VkAllocator* _allocator)
{
int _dims = m.dims;
if (_dims == 1)
create(m.w, m.elemsize, m.elempack, _allocator);
if (_dims == 2)
create(m.w, m.h, m.elemsize, m.elempack, _allocator);
if (_dims == 3)
create(m.w, m.h, m.c, m.elemsize, m.elempack, _allocator);
}

void VkMat::create_like(const VkImageMat& im, VkAllocator* _allocator)
{
int _dims = im.dims;
if (_dims == 1)
create(im.w, im.elemsize, im.elempack, _allocator);
if (_dims == 2)
create(im.w, im.h, im.elemsize, im.elempack, _allocator);
if (_dims == 3)
create(im.w, im.h, im.c, im.elemsize, im.elempack, _allocator);
}

void VkMat::addref()
{
if (refcount)
NCNN_XADD(refcount, 1);
}

void VkMat::release()
{
if (refcount && NCNN_XADD(refcount, -1) == 1)
{
if (allocator && data)
{
allocator->fastFree(data);
}
}

data = 0;

elemsize = 0;
elempack = 0;

dims = 0;
w = 0;
h = 0;
c = 0;

cstep = 0;

refcount = 0;
}

VkImageMat& VkImageMat::operator=(const VkImageMat& m)
{
if (this == &m)
return *this;

if (m.refcount)
NCNN_XADD(m.refcount, 1);

release();

data = m.data;
refcount = m.refcount;
elemsize = m.elemsize;
elempack = m.elempack;
allocator = m.allocator;

dims = m.dims;
w = m.w;
h = m.h;
c = m.c;

return *this;
}

void VkImageMat::create(int _w, size_t _elemsize, VkAllocator* _allocator)
{
if (dims == 1 && w == _w && elemsize == _elemsize && elempack == 1 && allocator == _allocator)
return;

release();

elemsize = _elemsize;
elempack = 1;
allocator = _allocator;

dims = 1;
w = _w;
h = 1;
c = 1;

if (total() > 0)
{
data = allocator->fastMalloc(w, h, c, elemsize, elempack);
if (!data)
return;

refcount = (int*)((unsigned char*)data + offsetof(VkImageMemory, refcount));
*refcount = 1;
}
}

void VkImageMat::create(int _w, int _h, size_t _elemsize, VkAllocator* _allocator)
{
if (dims == 2 && w == _w && h == _h && elemsize == _elemsize && elempack == 1 && allocator == _allocator)
return;

release();

elemsize = _elemsize;
elempack = 1;
allocator = _allocator;

dims = 2;
w = _w;
h = _h;
c = 1;

if (total() > 0)
{
data = allocator->fastMalloc(w, h, c, elemsize, elempack);
if (!data)
return;

refcount = (int*)((unsigned char*)data + offsetof(VkImageMemory, refcount));
*refcount = 1;
}
}

void VkImageMat::create(int _w, int _h, int _c, size_t _elemsize, VkAllocator* _allocator)
{
if (dims == 3 && w == _w && h == _h && c == _c && elemsize == _elemsize && elempack == 1 && allocator == _allocator)
return;

release();

elemsize = _elemsize;
elempack = 1;
allocator = _allocator;

dims = 3;
w = _w;
h = _h;
c = _c;

if (total() > 0)
{
data = allocator->fastMalloc(w, h, c, elemsize, elempack);
if (!data)
return;

refcount = (int*)((unsigned char*)data + offsetof(VkImageMemory, refcount));
*refcount = 1;
}
}

void VkImageMat::create(int _w, size_t _elemsize, int _elempack, VkAllocator* _allocator)
{
if (dims == 1 && w == _w && elemsize == _elemsize && elempack == _elempack && allocator == _allocator)
return;

release();

elemsize = _elemsize;
elempack = _elempack;
allocator = _allocator;

dims = 1;
w = _w;
h = 1;
c = 1;

if (total() > 0)
{
data = allocator->fastMalloc(w, h, c, elemsize, elempack);
if (!data)
return;

refcount = (int*)((unsigned char*)data + offsetof(VkImageMemory, refcount));
*refcount = 1;
}
}

void VkImageMat::create(int _w, int _h, size_t _elemsize, int _elempack, VkAllocator* _allocator)
{
if (dims == 2 && w == _w && h == _h && elemsize == _elemsize && elempack == _elempack && allocator == _allocator)
return;

release();

elemsize = _elemsize;
elempack = _elempack;
allocator = _allocator;

dims = 2;
w = _w;
h = _h;
c = 1;

if (total() > 0)
{
data = allocator->fastMalloc(w, h, c, elemsize, elempack);
if (!data)
return;

refcount = (int*)((unsigned char*)data + offsetof(VkImageMemory, refcount));
*refcount = 1;
}
}

void VkImageMat::create(int _w, int _h, int _c, size_t _elemsize, int _elempack, VkAllocator* _allocator)
{
if (dims == 3 && w == _w && h == _h && c == _c && elemsize == _elemsize && elempack == _elempack && allocator == _allocator)
return;

release();

elemsize = _elemsize;
elempack = _elempack;
allocator = _allocator;

dims = 3;
w = _w;
h = _h;
c = _c;

if (total() > 0)
{
data = allocator->fastMalloc(w, h, c, elemsize, elempack);
if (!data)
return;

refcount = (int*)((unsigned char*)data + offsetof(VkImageMemory, refcount));
*refcount = 1;
}
}

void VkImageMat::create_like(const Mat& m, VkAllocator* _allocator)
{
int _dims = m.dims;
if (_dims == 1)
create(m.w, m.elemsize, m.elempack, _allocator);
if (_dims == 2)
create(m.w, m.h, m.elemsize, m.elempack, _allocator);
if (_dims == 3)
create(m.w, m.h, m.c, m.elemsize, m.elempack, _allocator);
}

void VkImageMat::create_like(const VkMat& m, VkAllocator* _allocator)
{
int _dims = m.dims;
if (_dims == 1)
create(m.w, m.elemsize, m.elempack, _allocator);
if (_dims == 2)
create(m.w, m.h, m.elemsize, m.elempack, _allocator);
if (_dims == 3)
create(m.w, m.h, m.c, m.elemsize, m.elempack, _allocator);
}

void VkImageMat::create_like(const VkImageMat& im, VkAllocator* _allocator)
{
int _dims = im.dims;
if (_dims == 1)
create(im.w, im.elemsize, im.elempack, _allocator);
if (_dims == 2)
create(im.w, im.h, im.elemsize, im.elempack, _allocator);
if (_dims == 3)
create(im.w, im.h, im.c, im.elemsize, im.elempack, _allocator);
}

void VkImageMat::addref()
{
if (refcount)
NCNN_XADD(refcount, 1);
}

void VkImageMat::release()
{
if (refcount && NCNN_XADD(refcount, -1) == 1)
{
if (allocator && data)
{
allocator->fastFree(data);
}
}

data = 0;

elemsize = 0;
elempack = 0;

dims = 0;
w = 0;
h = 0;
c = 0;

refcount = 0;
}
#endif // NCNN_VULKAN

void Mat::substract_mean_normalize(const float* mean_vals, const float* norm_vals)
{
Layer* op;


+ 3
- 924
src/mat.h View File

@@ -707,8 +707,7 @@ inline Mat::Mat(int _w, int _h, int _c, size_t _elemsize, int _elempack, Allocat
inline Mat::Mat(const Mat& m)
: data(m.data), refcount(m.refcount), elemsize(m.elemsize), elempack(m.elempack), allocator(m.allocator), dims(m.dims), w(m.w), h(m.h), c(m.c), cstep(m.cstep)
{
if (refcount)
NCNN_XADD(refcount, 1);
addref();
}

inline Mat::Mat(int _w, void* _data, size_t _elemsize, Allocator* _allocator)
@@ -752,32 +751,6 @@ inline Mat::~Mat()
release();
}

inline Mat& Mat::operator=(const Mat& m)
{
if (this == &m)
return *this;

if (m.refcount)
NCNN_XADD(m.refcount, 1);

release();

data = m.data;
refcount = m.refcount;
elemsize = m.elemsize;
elempack = m.elempack;
allocator = m.allocator;

dims = m.dims;
w = m.w;
h = m.h;
c = m.c;

cstep = m.cstep;

return *this;
}

inline void Mat::fill(float _v)
{
int size = (int)total();
@@ -961,386 +934,6 @@ inline void Mat::fill(T _v)
}
}

inline Mat Mat::clone(Allocator* _allocator) const
{
if (empty())
return Mat();

Mat m;
if (dims == 1)
m.create(w, elemsize, elempack, _allocator);
else if (dims == 2)
m.create(w, h, elemsize, elempack, _allocator);
else if (dims == 3)
m.create(w, h, c, elemsize, elempack, _allocator);

if (total() > 0)
{
memcpy(m.data, data, total() * elemsize);
}

return m;
}

inline void Mat::clone_from(const ncnn::Mat& mat, Allocator* allocator)
{
*this = mat.clone(allocator);
}

inline Mat Mat::reshape(int _w, Allocator* _allocator) const
{
if (w * h * c != _w)
return Mat();

if (dims == 3 && cstep != (size_t)w * h)
{
Mat m;
m.create(_w, elemsize, elempack, _allocator);

// flatten
for (int i = 0; i < c; i++)
{
const void* ptr = (unsigned char*)data + i * cstep * elemsize;
void* mptr = (unsigned char*)m.data + (size_t)i * w * h * elemsize;
memcpy(mptr, ptr, (size_t)w * h * elemsize);
}

return m;
}

Mat m = *this;

m.dims = 1;
m.w = _w;
m.h = 1;
m.c = 1;

m.cstep = _w;

return m;
}

inline Mat Mat::reshape(int _w, int _h, Allocator* _allocator) const
{
if (w * h * c != _w * _h)
return Mat();

if (dims == 3 && cstep != (size_t)w * h)
{
Mat m;
m.create(_w, _h, elemsize, elempack, _allocator);

// flatten
for (int i = 0; i < c; i++)
{
const void* ptr = (unsigned char*)data + i * cstep * elemsize;
void* mptr = (unsigned char*)m.data + (size_t)i * w * h * elemsize;
memcpy(mptr, ptr, (size_t)w * h * elemsize);
}

return m;
}

Mat m = *this;

m.dims = 2;
m.w = _w;
m.h = _h;
m.c = 1;

m.cstep = (size_t)_w * _h;

return m;
}

inline Mat Mat::reshape(int _w, int _h, int _c, Allocator* _allocator) const
{
if (w * h * c != _w * _h * _c)
return Mat();

if (dims < 3)
{
if ((size_t)_w * _h != alignSize((size_t)_w * _h * elemsize, 16) / elemsize)
{
Mat m;
m.create(_w, _h, _c, elemsize, elempack, _allocator);

// align channel
for (int i = 0; i < _c; i++)
{
const void* ptr = (unsigned char*)data + (size_t)i * _w * _h * elemsize;
void* mptr = (unsigned char*)m.data + i * m.cstep * m.elemsize;
memcpy(mptr, ptr, (size_t)_w * _h * elemsize);
}

return m;
}
}
else if (c != _c)
{
// flatten and then align
Mat tmp = reshape(_w * _h * _c, _allocator);
return tmp.reshape(_w, _h, _c, _allocator);
}

Mat m = *this;

m.dims = 3;
m.w = _w;
m.h = _h;
m.c = _c;

m.cstep = alignSize((size_t)_w * _h * elemsize, 16) / elemsize;

return m;
}

inline void Mat::create(int _w, size_t _elemsize, Allocator* _allocator)
{
if (dims == 1 && w == _w && elemsize == _elemsize && elempack == 1 && allocator == _allocator)
return;

release();

elemsize = _elemsize;
elempack = 1;
allocator = _allocator;

dims = 1;
w = _w;
h = 1;
c = 1;

cstep = w;

if (total() > 0)
{
size_t totalsize = alignSize(total() * elemsize, 4);
if (allocator)
data = allocator->fastMalloc(totalsize + (int)sizeof(*refcount));
else
data = fastMalloc(totalsize + (int)sizeof(*refcount));
refcount = (int*)(((unsigned char*)data) + totalsize);
*refcount = 1;
}
}

inline void Mat::create(int _w, int _h, size_t _elemsize, Allocator* _allocator)
{
if (dims == 2 && w == _w && h == _h && elemsize == _elemsize && elempack == 1 && allocator == _allocator)
return;

release();

elemsize = _elemsize;
elempack = 1;
allocator = _allocator;

dims = 2;
w = _w;
h = _h;
c = 1;

cstep = (size_t)w * h;

if (total() > 0)
{
size_t totalsize = alignSize(total() * elemsize, 4);
if (allocator)
data = allocator->fastMalloc(totalsize + (int)sizeof(*refcount));
else
data = fastMalloc(totalsize + (int)sizeof(*refcount));
refcount = (int*)(((unsigned char*)data) + totalsize);
*refcount = 1;
}
}

inline void Mat::create(int _w, int _h, int _c, size_t _elemsize, Allocator* _allocator)
{
if (dims == 3 && w == _w && h == _h && c == _c && elemsize == _elemsize && elempack == 1 && allocator == _allocator)
return;

release();

elemsize = _elemsize;
elempack = 1;
allocator = _allocator;

dims = 3;
w = _w;
h = _h;
c = _c;

cstep = alignSize((size_t)w * h * elemsize, 16) / elemsize;

if (total() > 0)
{
size_t totalsize = alignSize(total() * elemsize, 4);
if (allocator)
data = allocator->fastMalloc(totalsize + (int)sizeof(*refcount));
else
data = fastMalloc(totalsize + (int)sizeof(*refcount));
refcount = (int*)(((unsigned char*)data) + totalsize);
*refcount = 1;
}
}

inline void Mat::create(int _w, size_t _elemsize, int _elempack, Allocator* _allocator)
{
if (dims == 1 && w == _w && elemsize == _elemsize && elempack == _elempack && allocator == _allocator)
return;

release();

elemsize = _elemsize;
elempack = _elempack;
allocator = _allocator;

dims = 1;
w = _w;
h = 1;
c = 1;

cstep = w;

if (total() > 0)
{
size_t totalsize = alignSize(total() * elemsize, 4);
if (allocator)
data = allocator->fastMalloc(totalsize + (int)sizeof(*refcount));
else
data = fastMalloc(totalsize + (int)sizeof(*refcount));
refcount = (int*)(((unsigned char*)data) + totalsize);
*refcount = 1;
}
}

inline void Mat::create(int _w, int _h, size_t _elemsize, int _elempack, Allocator* _allocator)
{
if (dims == 2 && w == _w && h == _h && elemsize == _elemsize && elempack == _elempack && allocator == _allocator)
return;

release();

elemsize = _elemsize;
elempack = _elempack;
allocator = _allocator;

dims = 2;
w = _w;
h = _h;
c = 1;

cstep = (size_t)w * h;

if (total() > 0)
{
size_t totalsize = alignSize(total() * elemsize, 4);
if (allocator)
data = allocator->fastMalloc(totalsize + (int)sizeof(*refcount));
else
data = fastMalloc(totalsize + (int)sizeof(*refcount));
refcount = (int*)(((unsigned char*)data) + totalsize);
*refcount = 1;
}
}

inline void Mat::create(int _w, int _h, int _c, size_t _elemsize, int _elempack, Allocator* _allocator)
{
if (dims == 3 && w == _w && h == _h && c == _c && elemsize == _elemsize && elempack == _elempack && allocator == _allocator)
return;

release();

elemsize = _elemsize;
elempack = _elempack;
allocator = _allocator;

dims = 3;
w = _w;
h = _h;
c = _c;

cstep = alignSize((size_t)w * h * elemsize, 16) / elemsize;

if (total() > 0)
{
size_t totalsize = alignSize(total() * elemsize, 4);
if (allocator)
data = allocator->fastMalloc(totalsize + (int)sizeof(*refcount));
else
data = fastMalloc(totalsize + (int)sizeof(*refcount));
refcount = (int*)(((unsigned char*)data) + totalsize);
*refcount = 1;
}
}

inline void Mat::create_like(const Mat& m, Allocator* _allocator)
{
int _dims = m.dims;
if (_dims == 1)
create(m.w, m.elemsize, m.elempack, _allocator);
if (_dims == 2)
create(m.w, m.h, m.elemsize, m.elempack, _allocator);
if (_dims == 3)
create(m.w, m.h, m.c, m.elemsize, m.elempack, _allocator);
}

#if NCNN_VULKAN
inline void Mat::create_like(const VkMat& m, Allocator* _allocator)
{
int _dims = m.dims;
if (_dims == 1)
create(m.w, m.elemsize, m.elempack, _allocator);
if (_dims == 2)
create(m.w, m.h, m.elemsize, m.elempack, _allocator);
if (_dims == 3)
create(m.w, m.h, m.c, m.elemsize, m.elempack, _allocator);
}

inline void Mat::create_like(const VkImageMat& im, Allocator* _allocator)
{
int _dims = im.dims;
if (_dims == 1)
create(im.w, im.elemsize, im.elempack, _allocator);
if (_dims == 2)
create(im.w, im.h, im.elemsize, im.elempack, _allocator);
if (_dims == 3)
create(im.w, im.h, im.c, im.elemsize, im.elempack, _allocator);
}
#endif // NCNN_VULKAN

inline void Mat::addref()
{
if (refcount)
NCNN_XADD(refcount, 1);
}

inline void Mat::release()
{
if (refcount && NCNN_XADD(refcount, -1) == 1)
{
if (allocator)
allocator->fastFree(data);
else
fastFree(data);
}

data = 0;

elemsize = 0;
elempack = 0;

dims = 0;
w = 0;
h = 0;
c = 0;

cstep = 0;

refcount = 0;
}

inline bool Mat::empty() const
{
return data == 0 || total() == 0;
@@ -1498,8 +1091,7 @@ inline VkMat::VkMat(int _w, int _h, int _c, size_t _elemsize, int _elempack, VkA
inline VkMat::VkMat(const VkMat& m)
: data(m.data), refcount(m.refcount), elemsize(m.elemsize), elempack(m.elempack), allocator(m.allocator), dims(m.dims), w(m.w), h(m.h), c(m.c)
{
if (refcount)
NCNN_XADD(refcount, 1);
addref();

cstep = m.cstep;
}
@@ -1545,239 +1137,6 @@ inline VkMat::~VkMat()
release();
}

inline VkMat& VkMat::operator=(const VkMat& m)
{
if (this == &m)
return *this;

if (m.refcount)
NCNN_XADD(m.refcount, 1);

release();

data = m.data;
refcount = m.refcount;
elemsize = m.elemsize;
elempack = m.elempack;
allocator = m.allocator;

dims = m.dims;
w = m.w;
h = m.h;
c = m.c;

cstep = m.cstep;

return *this;
}

inline void VkMat::create(int _w, size_t _elemsize, VkAllocator* _allocator)
{
if (dims == 1 && w == _w && elemsize == _elemsize && elempack == 1 && allocator == _allocator)
return;

release();

elemsize = _elemsize;
elempack = 1;
allocator = _allocator;

dims = 1;
w = _w;
h = 1;
c = 1;

cstep = w;

if (total() > 0)
{
size_t totalsize = alignSize(total() * elemsize, 4);

data = allocator->fastMalloc(totalsize);

refcount = (int*)((unsigned char*)data + offsetof(VkBufferMemory, refcount));
*refcount = 1;
}
}

inline void VkMat::create(int _w, int _h, size_t _elemsize, VkAllocator* _allocator)
{
if (dims == 2 && w == _w && h == _h && elemsize == _elemsize && elempack == 1 && allocator == _allocator)
return;

release();

elemsize = _elemsize;
elempack = 1;
allocator = _allocator;

dims = 2;
w = _w;
h = _h;
c = 1;

cstep = w * h;

if (total() > 0)
{
size_t totalsize = alignSize(total() * elemsize, 4);

data = allocator->fastMalloc(totalsize);

refcount = (int*)((unsigned char*)data + offsetof(VkBufferMemory, refcount));
*refcount = 1;
}
}

inline void VkMat::create(int _w, int _h, int _c, size_t _elemsize, VkAllocator* _allocator)
{
if (dims == 3 && w == _w && h == _h && c == _c && elemsize == _elemsize && elempack == 1 && allocator == _allocator)
return;

release();

elemsize = _elemsize;
elempack = 1;
allocator = _allocator;

dims = 3;
w = _w;
h = _h;
c = _c;

cstep = alignSize(w * h * elemsize, 16) / elemsize;

if (total() > 0)
{
size_t totalsize = alignSize(total() * elemsize, 4);

data = allocator->fastMalloc(totalsize);

refcount = (int*)((unsigned char*)data + offsetof(VkBufferMemory, refcount));
*refcount = 1;
}
}

inline void VkMat::create(int _w, size_t _elemsize, int _elempack, VkAllocator* _allocator)
{
if (dims == 1 && w == _w && elemsize == _elemsize && elempack == _elempack && allocator == _allocator)
return;

release();

elemsize = _elemsize;
elempack = _elempack;
allocator = _allocator;

dims = 1;
w = _w;
h = 1;
c = 1;

cstep = w;

if (total() > 0)
{
size_t totalsize = alignSize(total() * elemsize, 4);

data = allocator->fastMalloc(totalsize);

refcount = (int*)((unsigned char*)data + offsetof(VkBufferMemory, refcount));
*refcount = 1;
}
}

inline void VkMat::create(int _w, int _h, size_t _elemsize, int _elempack, VkAllocator* _allocator)
{
if (dims == 2 && w == _w && h == _h && elemsize == _elemsize && elempack == _elempack && allocator == _allocator)
return;

release();

elemsize = _elemsize;
elempack = _elempack;
allocator = _allocator;

dims = 2;
w = _w;
h = _h;
c = 1;

cstep = w * h;

if (total() > 0)
{
size_t totalsize = alignSize(total() * elemsize, 4);

data = allocator->fastMalloc(totalsize);

refcount = (int*)((unsigned char*)data + offsetof(VkBufferMemory, refcount));
*refcount = 1;
}
}

inline void VkMat::create(int _w, int _h, int _c, size_t _elemsize, int _elempack, VkAllocator* _allocator)
{
if (dims == 3 && w == _w && h == _h && c == _c && elemsize == _elemsize && elempack == _elempack && allocator == _allocator)
return;

release();

elemsize = _elemsize;
elempack = _elempack;
allocator = _allocator;

dims = 3;
w = _w;
h = _h;
c = _c;

cstep = alignSize(w * h * elemsize, 16) / elemsize;

if (total() > 0)
{
size_t totalsize = alignSize(total() * elemsize, 4);

data = allocator->fastMalloc(totalsize);

refcount = (int*)((unsigned char*)data + offsetof(VkBufferMemory, refcount));
*refcount = 1;
}
}

inline void VkMat::create_like(const Mat& m, VkAllocator* _allocator)
{
int _dims = m.dims;
if (_dims == 1)
create(m.w, m.elemsize, m.elempack, _allocator);
if (_dims == 2)
create(m.w, m.h, m.elemsize, m.elempack, _allocator);
if (_dims == 3)
create(m.w, m.h, m.c, m.elemsize, m.elempack, _allocator);
}

inline void VkMat::create_like(const VkMat& m, VkAllocator* _allocator)
{
int _dims = m.dims;
if (_dims == 1)
create(m.w, m.elemsize, m.elempack, _allocator);
if (_dims == 2)
create(m.w, m.h, m.elemsize, m.elempack, _allocator);
if (_dims == 3)
create(m.w, m.h, m.c, m.elemsize, m.elempack, _allocator);
}

inline void VkMat::create_like(const VkImageMat& im, VkAllocator* _allocator)
{
int _dims = im.dims;
if (_dims == 1)
create(im.w, im.elemsize, im.elempack, _allocator);
if (_dims == 2)
create(im.w, im.h, im.elemsize, im.elempack, _allocator);
if (_dims == 3)
create(im.w, im.h, im.c, im.elemsize, im.elempack, _allocator);
}

inline Mat VkMat::mapped() const
{
if (!allocator->mappable)
@@ -1803,37 +1162,6 @@ inline void* VkMat::mapped_ptr() const
return (unsigned char*)data->mapped_ptr + data->offset;
}

inline void VkMat::addref()
{
if (refcount)
NCNN_XADD(refcount, 1);
}

inline void VkMat::release()
{
if (refcount && NCNN_XADD(refcount, -1) == 1)
{
if (allocator && data)
{
allocator->fastFree(data);
}
}

data = 0;

elemsize = 0;
elempack = 0;

dims = 0;
w = 0;
h = 0;
c = 0;

cstep = 0;

refcount = 0;
}

inline bool VkMat::empty() const
{
return data == 0 || total() == 0;
@@ -1920,8 +1248,7 @@ inline VkImageMat::VkImageMat(int _w, int _h, int _c, size_t _elemsize, int _ele
inline VkImageMat::VkImageMat(const VkImageMat& m)
: data(m.data), refcount(m.refcount), elemsize(m.elemsize), elempack(m.elempack), allocator(m.allocator), dims(m.dims), w(m.w), h(m.h), c(m.c)
{
if (refcount)
NCNN_XADD(refcount, 1);
addref();
}

inline VkImageMat::VkImageMat(int _w, VkImageMemory* _data, size_t _elemsize, VkAllocator* _allocator)
@@ -1959,225 +1286,6 @@ inline VkImageMat::~VkImageMat()
release();
}

inline VkImageMat& VkImageMat::operator=(const VkImageMat& m)
{
if (this == &m)
return *this;

if (m.refcount)
NCNN_XADD(m.refcount, 1);

release();

data = m.data;
refcount = m.refcount;
elemsize = m.elemsize;
elempack = m.elempack;
allocator = m.allocator;

dims = m.dims;
w = m.w;
h = m.h;
c = m.c;

return *this;
}

inline void VkImageMat::create(int _w, size_t _elemsize, VkAllocator* _allocator)
{
if (dims == 1 && w == _w && elemsize == _elemsize && elempack == 1 && allocator == _allocator)
return;

release();

elemsize = _elemsize;
elempack = 1;
allocator = _allocator;

dims = 1;
w = _w;
h = 1;
c = 1;

if (total() > 0)
{
data = allocator->fastMalloc(w, h, c, elemsize, elempack);
if (!data)
return;

refcount = (int*)((unsigned char*)data + offsetof(VkImageMemory, refcount));
*refcount = 1;
}
}

inline void VkImageMat::create(int _w, int _h, size_t _elemsize, VkAllocator* _allocator)
{
if (dims == 2 && w == _w && h == _h && elemsize == _elemsize && elempack == 1 && allocator == _allocator)
return;

release();

elemsize = _elemsize;
elempack = 1;
allocator = _allocator;

dims = 2;
w = _w;
h = _h;
c = 1;

if (total() > 0)
{
data = allocator->fastMalloc(w, h, c, elemsize, elempack);
if (!data)
return;

refcount = (int*)((unsigned char*)data + offsetof(VkImageMemory, refcount));
*refcount = 1;
}
}

inline void VkImageMat::create(int _w, int _h, int _c, size_t _elemsize, VkAllocator* _allocator)
{
if (dims == 3 && w == _w && h == _h && c == _c && elemsize == _elemsize && elempack == 1 && allocator == _allocator)
return;

release();

elemsize = _elemsize;
elempack = 1;
allocator = _allocator;

dims = 3;
w = _w;
h = _h;
c = _c;

if (total() > 0)
{
data = allocator->fastMalloc(w, h, c, elemsize, elempack);
if (!data)
return;

refcount = (int*)((unsigned char*)data + offsetof(VkImageMemory, refcount));
*refcount = 1;
}
}

inline void VkImageMat::create(int _w, size_t _elemsize, int _elempack, VkAllocator* _allocator)
{
if (dims == 1 && w == _w && elemsize == _elemsize && elempack == _elempack && allocator == _allocator)
return;

release();

elemsize = _elemsize;
elempack = _elempack;
allocator = _allocator;

dims = 1;
w = _w;
h = 1;
c = 1;

if (total() > 0)
{
data = allocator->fastMalloc(w, h, c, elemsize, elempack);
if (!data)
return;

refcount = (int*)((unsigned char*)data + offsetof(VkImageMemory, refcount));
*refcount = 1;
}
}

inline void VkImageMat::create(int _w, int _h, size_t _elemsize, int _elempack, VkAllocator* _allocator)
{
if (dims == 2 && w == _w && h == _h && elemsize == _elemsize && elempack == _elempack && allocator == _allocator)
return;

release();

elemsize = _elemsize;
elempack = _elempack;
allocator = _allocator;

dims = 2;
w = _w;
h = _h;
c = 1;

if (total() > 0)
{
data = allocator->fastMalloc(w, h, c, elemsize, elempack);
if (!data)
return;

refcount = (int*)((unsigned char*)data + offsetof(VkImageMemory, refcount));
*refcount = 1;
}
}

inline void VkImageMat::create(int _w, int _h, int _c, size_t _elemsize, int _elempack, VkAllocator* _allocator)
{
if (dims == 3 && w == _w && h == _h && c == _c && elemsize == _elemsize && elempack == _elempack && allocator == _allocator)
return;

release();

elemsize = _elemsize;
elempack = _elempack;
allocator = _allocator;

dims = 3;
w = _w;
h = _h;
c = _c;

if (total() > 0)
{
data = allocator->fastMalloc(w, h, c, elemsize, elempack);
if (!data)
return;

refcount = (int*)((unsigned char*)data + offsetof(VkImageMemory, refcount));
*refcount = 1;
}
}

inline void VkImageMat::create_like(const Mat& m, VkAllocator* _allocator)
{
int _dims = m.dims;
if (_dims == 1)
create(m.w, m.elemsize, m.elempack, _allocator);
if (_dims == 2)
create(m.w, m.h, m.elemsize, m.elempack, _allocator);
if (_dims == 3)
create(m.w, m.h, m.c, m.elemsize, m.elempack, _allocator);
}

inline void VkImageMat::create_like(const VkMat& m, VkAllocator* _allocator)
{
int _dims = m.dims;
if (_dims == 1)
create(m.w, m.elemsize, m.elempack, _allocator);
if (_dims == 2)
create(m.w, m.h, m.elemsize, m.elempack, _allocator);
if (_dims == 3)
create(m.w, m.h, m.c, m.elemsize, m.elempack, _allocator);
}

inline void VkImageMat::create_like(const VkImageMat& im, VkAllocator* _allocator)
{
int _dims = im.dims;
if (_dims == 1)
create(im.w, im.elemsize, im.elempack, _allocator);
if (_dims == 2)
create(im.w, im.h, im.elemsize, im.elempack, _allocator);
if (_dims == 3)
create(im.w, im.h, im.c, im.elemsize, im.elempack, _allocator);
}

inline Mat VkImageMat::mapped() const
{
if (!allocator->mappable || !data->mapped_ptr)
@@ -2203,35 +1311,6 @@ inline void* VkImageMat::mapped_ptr() const
return (unsigned char*)data->mapped_ptr + data->bind_offset;
}

inline void VkImageMat::addref()
{
if (refcount)
NCNN_XADD(refcount, 1);
}

inline void VkImageMat::release()
{
if (refcount && NCNN_XADD(refcount, -1) == 1)
{
if (allocator && data)
{
allocator->fastFree(data);
}
}

data = 0;

elemsize = 0;
elempack = 0;

dims = 0;
w = 0;
h = 0;
c = 0;

refcount = 0;
}

inline bool VkImageMat::empty() const
{
return data == 0 || total() == 0;


Loading…
Cancel
Save