You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

test_packing.cpp 12 kB

adreno image shader + fp16 + fp16a (#1714) * wip * wip * fix * image and imageview can not be destroyed until command execution ends * fast copy path for tightly packed data * wip * texture load works * 1d 3d image * record clone image, multiple commands share one image reference * upload download image * layer forward accept vkimagemat * vkimagemat graph works * staging vkimagemat for passing dynamic parameters, macro for fp32+image shader, padding image shader * vkimagemat elemsize * convolution test pass * conv1x1s1 image shader * fast staging image allocator from host memory, pooling image shader * convolutiondepthwise image shader * innerproduct image shader * packing image shader * crop deconvolution image shader * resolve spirv binding types * image fp16 and fp16a, cast image shader * eltwise image shader * wip * absval image shader * deconvolutiondepthwise image shader * concat image shader, squeezenet works * noop split image shader * uniform precision hint * layer support_image_storage * wip * vulkan device utility operator * command is storage and packing option aware * fallback to cpu on image allocation failed, mobilenetssd works * flatten image shader, enable more test * ci test * check imgfp32 imgfp16 imgfp16a features * fix ci test * fix ci test * upgrade swiftshader * wip * opt aggressive * imgfp16p * opt none * convolution winograd image shader * fix flush range, fast copy path for continous buffer * minor fix * fix innerproduct * wip ... * wip * cast fix * packing test * wip * image fp16p is fp16p * wip * silence * more line info * code clean * softmax image shader
6 years ago
adreno image shader + fp16 + fp16a (#1714) * wip * wip * fix * image and imageview can not be destroyed until command execution ends * fast copy path for tightly packed data * wip * texture load works * 1d 3d image * record clone image, multiple commands share one image reference * upload download image * layer forward accept vkimagemat * vkimagemat graph works * staging vkimagemat for passing dynamic parameters, macro for fp32+image shader, padding image shader * vkimagemat elemsize * convolution test pass * conv1x1s1 image shader * fast staging image allocator from host memory, pooling image shader * convolutiondepthwise image shader * innerproduct image shader * packing image shader * crop deconvolution image shader * resolve spirv binding types * image fp16 and fp16a, cast image shader * eltwise image shader * wip * absval image shader * deconvolutiondepthwise image shader * concat image shader, squeezenet works * noop split image shader * uniform precision hint * layer support_image_storage * wip * vulkan device utility operator * command is storage and packing option aware * fallback to cpu on image allocation failed, mobilenetssd works * flatten image shader, enable more test * ci test * check imgfp32 imgfp16 imgfp16a features * fix ci test * fix ci test * upgrade swiftshader * wip * opt aggressive * imgfp16p * opt none * convolution winograd image shader * fix flush range, fast copy path for continous buffer * minor fix * fix innerproduct * wip ... * wip * cast fix * packing test * wip * image fp16p is fp16p * wip * silence * more line info * code clean * softmax image shader
6 years ago
adreno image shader + fp16 + fp16a (#1714) * wip * wip * fix * image and imageview can not be destroyed until command execution ends * fast copy path for tightly packed data * wip * texture load works * 1d 3d image * record clone image, multiple commands share one image reference * upload download image * layer forward accept vkimagemat * vkimagemat graph works * staging vkimagemat for passing dynamic parameters, macro for fp32+image shader, padding image shader * vkimagemat elemsize * convolution test pass * conv1x1s1 image shader * fast staging image allocator from host memory, pooling image shader * convolutiondepthwise image shader * innerproduct image shader * packing image shader * crop deconvolution image shader * resolve spirv binding types * image fp16 and fp16a, cast image shader * eltwise image shader * wip * absval image shader * deconvolutiondepthwise image shader * concat image shader, squeezenet works * noop split image shader * uniform precision hint * layer support_image_storage * wip * vulkan device utility operator * command is storage and packing option aware * fallback to cpu on image allocation failed, mobilenetssd works * flatten image shader, enable more test * ci test * check imgfp32 imgfp16 imgfp16a features * fix ci test * fix ci test * upgrade swiftshader * wip * opt aggressive * imgfp16p * opt none * convolution winograd image shader * fix flush range, fast copy path for continous buffer * minor fix * fix innerproduct * wip ... * wip * cast fix * packing test * wip * image fp16p is fp16p * wip * silence * more line info * code clean * softmax image shader
6 years ago
adreno image shader + fp16 + fp16a (#1714) * wip * wip * fix * image and imageview can not be destroyed until command execution ends * fast copy path for tightly packed data * wip * texture load works * 1d 3d image * record clone image, multiple commands share one image reference * upload download image * layer forward accept vkimagemat * vkimagemat graph works * staging vkimagemat for passing dynamic parameters, macro for fp32+image shader, padding image shader * vkimagemat elemsize * convolution test pass * conv1x1s1 image shader * fast staging image allocator from host memory, pooling image shader * convolutiondepthwise image shader * innerproduct image shader * packing image shader * crop deconvolution image shader * resolve spirv binding types * image fp16 and fp16a, cast image shader * eltwise image shader * wip * absval image shader * deconvolutiondepthwise image shader * concat image shader, squeezenet works * noop split image shader * uniform precision hint * layer support_image_storage * wip * vulkan device utility operator * command is storage and packing option aware * fallback to cpu on image allocation failed, mobilenetssd works * flatten image shader, enable more test * ci test * check imgfp32 imgfp16 imgfp16a features * fix ci test * fix ci test * upgrade swiftshader * wip * opt aggressive * imgfp16p * opt none * convolution winograd image shader * fix flush range, fast copy path for continous buffer * minor fix * fix innerproduct * wip ... * wip * cast fix * packing test * wip * image fp16p is fp16p * wip * silence * more line info * code clean * softmax image shader
6 years ago
adreno image shader + fp16 + fp16a (#1714) * wip * wip * fix * image and imageview can not be destroyed until command execution ends * fast copy path for tightly packed data * wip * texture load works * 1d 3d image * record clone image, multiple commands share one image reference * upload download image * layer forward accept vkimagemat * vkimagemat graph works * staging vkimagemat for passing dynamic parameters, macro for fp32+image shader, padding image shader * vkimagemat elemsize * convolution test pass * conv1x1s1 image shader * fast staging image allocator from host memory, pooling image shader * convolutiondepthwise image shader * innerproduct image shader * packing image shader * crop deconvolution image shader * resolve spirv binding types * image fp16 and fp16a, cast image shader * eltwise image shader * wip * absval image shader * deconvolutiondepthwise image shader * concat image shader, squeezenet works * noop split image shader * uniform precision hint * layer support_image_storage * wip * vulkan device utility operator * command is storage and packing option aware * fallback to cpu on image allocation failed, mobilenetssd works * flatten image shader, enable more test * ci test * check imgfp32 imgfp16 imgfp16a features * fix ci test * fix ci test * upgrade swiftshader * wip * opt aggressive * imgfp16p * opt none * convolution winograd image shader * fix flush range, fast copy path for continous buffer * minor fix * fix innerproduct * wip ... * wip * cast fix * packing test * wip * image fp16p is fp16p * wip * silence * more line info * code clean * softmax image shader
6 years ago
adreno image shader + fp16 + fp16a (#1714) * wip * wip * fix * image and imageview can not be destroyed until command execution ends * fast copy path for tightly packed data * wip * texture load works * 1d 3d image * record clone image, multiple commands share one image reference * upload download image * layer forward accept vkimagemat * vkimagemat graph works * staging vkimagemat for passing dynamic parameters, macro for fp32+image shader, padding image shader * vkimagemat elemsize * convolution test pass * conv1x1s1 image shader * fast staging image allocator from host memory, pooling image shader * convolutiondepthwise image shader * innerproduct image shader * packing image shader * crop deconvolution image shader * resolve spirv binding types * image fp16 and fp16a, cast image shader * eltwise image shader * wip * absval image shader * deconvolutiondepthwise image shader * concat image shader, squeezenet works * noop split image shader * uniform precision hint * layer support_image_storage * wip * vulkan device utility operator * command is storage and packing option aware * fallback to cpu on image allocation failed, mobilenetssd works * flatten image shader, enable more test * ci test * check imgfp32 imgfp16 imgfp16a features * fix ci test * fix ci test * upgrade swiftshader * wip * opt aggressive * imgfp16p * opt none * convolution winograd image shader * fix flush range, fast copy path for continous buffer * minor fix * fix innerproduct * wip ... * wip * cast fix * packing test * wip * image fp16p is fp16p * wip * silence * more line info * code clean * softmax image shader
6 years ago
adreno image shader + fp16 + fp16a (#1714) * wip * wip * fix * image and imageview can not be destroyed until command execution ends * fast copy path for tightly packed data * wip * texture load works * 1d 3d image * record clone image, multiple commands share one image reference * upload download image * layer forward accept vkimagemat * vkimagemat graph works * staging vkimagemat for passing dynamic parameters, macro for fp32+image shader, padding image shader * vkimagemat elemsize * convolution test pass * conv1x1s1 image shader * fast staging image allocator from host memory, pooling image shader * convolutiondepthwise image shader * innerproduct image shader * packing image shader * crop deconvolution image shader * resolve spirv binding types * image fp16 and fp16a, cast image shader * eltwise image shader * wip * absval image shader * deconvolutiondepthwise image shader * concat image shader, squeezenet works * noop split image shader * uniform precision hint * layer support_image_storage * wip * vulkan device utility operator * command is storage and packing option aware * fallback to cpu on image allocation failed, mobilenetssd works * flatten image shader, enable more test * ci test * check imgfp32 imgfp16 imgfp16a features * fix ci test * fix ci test * upgrade swiftshader * wip * opt aggressive * imgfp16p * opt none * convolution winograd image shader * fix flush range, fast copy path for continous buffer * minor fix * fix innerproduct * wip ... * wip * cast fix * packing test * wip * image fp16p is fp16p * wip * silence * more line info * code clean * softmax image shader
6 years ago
adreno image shader + fp16 + fp16a (#1714) * wip * wip * fix * image and imageview can not be destroyed until command execution ends * fast copy path for tightly packed data * wip * texture load works * 1d 3d image * record clone image, multiple commands share one image reference * upload download image * layer forward accept vkimagemat * vkimagemat graph works * staging vkimagemat for passing dynamic parameters, macro for fp32+image shader, padding image shader * vkimagemat elemsize * convolution test pass * conv1x1s1 image shader * fast staging image allocator from host memory, pooling image shader * convolutiondepthwise image shader * innerproduct image shader * packing image shader * crop deconvolution image shader * resolve spirv binding types * image fp16 and fp16a, cast image shader * eltwise image shader * wip * absval image shader * deconvolutiondepthwise image shader * concat image shader, squeezenet works * noop split image shader * uniform precision hint * layer support_image_storage * wip * vulkan device utility operator * command is storage and packing option aware * fallback to cpu on image allocation failed, mobilenetssd works * flatten image shader, enable more test * ci test * check imgfp32 imgfp16 imgfp16a features * fix ci test * fix ci test * upgrade swiftshader * wip * opt aggressive * imgfp16p * opt none * convolution winograd image shader * fix flush range, fast copy path for continous buffer * minor fix * fix innerproduct * wip ... * wip * cast fix * packing test * wip * image fp16p is fp16p * wip * silence * more line info * code clean * softmax image shader
6 years ago
adreno image shader + fp16 + fp16a (#1714) * wip * wip * fix * image and imageview can not be destroyed until command execution ends * fast copy path for tightly packed data * wip * texture load works * 1d 3d image * record clone image, multiple commands share one image reference * upload download image * layer forward accept vkimagemat * vkimagemat graph works * staging vkimagemat for passing dynamic parameters, macro for fp32+image shader, padding image shader * vkimagemat elemsize * convolution test pass * conv1x1s1 image shader * fast staging image allocator from host memory, pooling image shader * convolutiondepthwise image shader * innerproduct image shader * packing image shader * crop deconvolution image shader * resolve spirv binding types * image fp16 and fp16a, cast image shader * eltwise image shader * wip * absval image shader * deconvolutiondepthwise image shader * concat image shader, squeezenet works * noop split image shader * uniform precision hint * layer support_image_storage * wip * vulkan device utility operator * command is storage and packing option aware * fallback to cpu on image allocation failed, mobilenetssd works * flatten image shader, enable more test * ci test * check imgfp32 imgfp16 imgfp16a features * fix ci test * fix ci test * upgrade swiftshader * wip * opt aggressive * imgfp16p * opt none * convolution winograd image shader * fix flush range, fast copy path for continous buffer * minor fix * fix innerproduct * wip ... * wip * cast fix * packing test * wip * image fp16p is fp16p * wip * silence * more line info * code clean * softmax image shader
6 years ago
adreno image shader + fp16 + fp16a (#1714) * wip * wip * fix * image and imageview can not be destroyed until command execution ends * fast copy path for tightly packed data * wip * texture load works * 1d 3d image * record clone image, multiple commands share one image reference * upload download image * layer forward accept vkimagemat * vkimagemat graph works * staging vkimagemat for passing dynamic parameters, macro for fp32+image shader, padding image shader * vkimagemat elemsize * convolution test pass * conv1x1s1 image shader * fast staging image allocator from host memory, pooling image shader * convolutiondepthwise image shader * innerproduct image shader * packing image shader * crop deconvolution image shader * resolve spirv binding types * image fp16 and fp16a, cast image shader * eltwise image shader * wip * absval image shader * deconvolutiondepthwise image shader * concat image shader, squeezenet works * noop split image shader * uniform precision hint * layer support_image_storage * wip * vulkan device utility operator * command is storage and packing option aware * fallback to cpu on image allocation failed, mobilenetssd works * flatten image shader, enable more test * ci test * check imgfp32 imgfp16 imgfp16a features * fix ci test * fix ci test * upgrade swiftshader * wip * opt aggressive * imgfp16p * opt none * convolution winograd image shader * fix flush range, fast copy path for continous buffer * minor fix * fix innerproduct * wip ... * wip * cast fix * packing test * wip * image fp16p is fp16p * wip * silence * more line info * code clean * softmax image shader
6 years ago
adreno image shader + fp16 + fp16a (#1714) * wip * wip * fix * image and imageview can not be destroyed until command execution ends * fast copy path for tightly packed data * wip * texture load works * 1d 3d image * record clone image, multiple commands share one image reference * upload download image * layer forward accept vkimagemat * vkimagemat graph works * staging vkimagemat for passing dynamic parameters, macro for fp32+image shader, padding image shader * vkimagemat elemsize * convolution test pass * conv1x1s1 image shader * fast staging image allocator from host memory, pooling image shader * convolutiondepthwise image shader * innerproduct image shader * packing image shader * crop deconvolution image shader * resolve spirv binding types * image fp16 and fp16a, cast image shader * eltwise image shader * wip * absval image shader * deconvolutiondepthwise image shader * concat image shader, squeezenet works * noop split image shader * uniform precision hint * layer support_image_storage * wip * vulkan device utility operator * command is storage and packing option aware * fallback to cpu on image allocation failed, mobilenetssd works * flatten image shader, enable more test * ci test * check imgfp32 imgfp16 imgfp16a features * fix ci test * fix ci test * upgrade swiftshader * wip * opt aggressive * imgfp16p * opt none * convolution winograd image shader * fix flush range, fast copy path for continous buffer * minor fix * fix innerproduct * wip ... * wip * cast fix * packing test * wip * image fp16p is fp16p * wip * silence * more line info * code clean * softmax image shader
6 years ago
adreno image shader + fp16 + fp16a (#1714) * wip * wip * fix * image and imageview can not be destroyed until command execution ends * fast copy path for tightly packed data * wip * texture load works * 1d 3d image * record clone image, multiple commands share one image reference * upload download image * layer forward accept vkimagemat * vkimagemat graph works * staging vkimagemat for passing dynamic parameters, macro for fp32+image shader, padding image shader * vkimagemat elemsize * convolution test pass * conv1x1s1 image shader * fast staging image allocator from host memory, pooling image shader * convolutiondepthwise image shader * innerproduct image shader * packing image shader * crop deconvolution image shader * resolve spirv binding types * image fp16 and fp16a, cast image shader * eltwise image shader * wip * absval image shader * deconvolutiondepthwise image shader * concat image shader, squeezenet works * noop split image shader * uniform precision hint * layer support_image_storage * wip * vulkan device utility operator * command is storage and packing option aware * fallback to cpu on image allocation failed, mobilenetssd works * flatten image shader, enable more test * ci test * check imgfp32 imgfp16 imgfp16a features * fix ci test * fix ci test * upgrade swiftshader * wip * opt aggressive * imgfp16p * opt none * convolution winograd image shader * fix flush range, fast copy path for continous buffer * minor fix * fix innerproduct * wip ... * wip * cast fix * packing test * wip * image fp16p is fp16p * wip * silence * more line info * code clean * softmax image shader
6 years ago
adreno image shader + fp16 + fp16a (#1714) * wip * wip * fix * image and imageview can not be destroyed until command execution ends * fast copy path for tightly packed data * wip * texture load works * 1d 3d image * record clone image, multiple commands share one image reference * upload download image * layer forward accept vkimagemat * vkimagemat graph works * staging vkimagemat for passing dynamic parameters, macro for fp32+image shader, padding image shader * vkimagemat elemsize * convolution test pass * conv1x1s1 image shader * fast staging image allocator from host memory, pooling image shader * convolutiondepthwise image shader * innerproduct image shader * packing image shader * crop deconvolution image shader * resolve spirv binding types * image fp16 and fp16a, cast image shader * eltwise image shader * wip * absval image shader * deconvolutiondepthwise image shader * concat image shader, squeezenet works * noop split image shader * uniform precision hint * layer support_image_storage * wip * vulkan device utility operator * command is storage and packing option aware * fallback to cpu on image allocation failed, mobilenetssd works * flatten image shader, enable more test * ci test * check imgfp32 imgfp16 imgfp16a features * fix ci test * fix ci test * upgrade swiftshader * wip * opt aggressive * imgfp16p * opt none * convolution winograd image shader * fix flush range, fast copy path for continous buffer * minor fix * fix innerproduct * wip ... * wip * cast fix * packing test * wip * image fp16p is fp16p * wip * silence * more line info * code clean * softmax image shader
6 years ago
adreno image shader + fp16 + fp16a (#1714) * wip * wip * fix * image and imageview can not be destroyed until command execution ends * fast copy path for tightly packed data * wip * texture load works * 1d 3d image * record clone image, multiple commands share one image reference * upload download image * layer forward accept vkimagemat * vkimagemat graph works * staging vkimagemat for passing dynamic parameters, macro for fp32+image shader, padding image shader * vkimagemat elemsize * convolution test pass * conv1x1s1 image shader * fast staging image allocator from host memory, pooling image shader * convolutiondepthwise image shader * innerproduct image shader * packing image shader * crop deconvolution image shader * resolve spirv binding types * image fp16 and fp16a, cast image shader * eltwise image shader * wip * absval image shader * deconvolutiondepthwise image shader * concat image shader, squeezenet works * noop split image shader * uniform precision hint * layer support_image_storage * wip * vulkan device utility operator * command is storage and packing option aware * fallback to cpu on image allocation failed, mobilenetssd works * flatten image shader, enable more test * ci test * check imgfp32 imgfp16 imgfp16a features * fix ci test * fix ci test * upgrade swiftshader * wip * opt aggressive * imgfp16p * opt none * convolution winograd image shader * fix flush range, fast copy path for continous buffer * minor fix * fix innerproduct * wip ... * wip * cast fix * packing test * wip * image fp16p is fp16p * wip * silence * more line info * code clean * softmax image shader
6 years ago
adreno image shader + fp16 + fp16a (#1714) * wip * wip * fix * image and imageview can not be destroyed until command execution ends * fast copy path for tightly packed data * wip * texture load works * 1d 3d image * record clone image, multiple commands share one image reference * upload download image * layer forward accept vkimagemat * vkimagemat graph works * staging vkimagemat for passing dynamic parameters, macro for fp32+image shader, padding image shader * vkimagemat elemsize * convolution test pass * conv1x1s1 image shader * fast staging image allocator from host memory, pooling image shader * convolutiondepthwise image shader * innerproduct image shader * packing image shader * crop deconvolution image shader * resolve spirv binding types * image fp16 and fp16a, cast image shader * eltwise image shader * wip * absval image shader * deconvolutiondepthwise image shader * concat image shader, squeezenet works * noop split image shader * uniform precision hint * layer support_image_storage * wip * vulkan device utility operator * command is storage and packing option aware * fallback to cpu on image allocation failed, mobilenetssd works * flatten image shader, enable more test * ci test * check imgfp32 imgfp16 imgfp16a features * fix ci test * fix ci test * upgrade swiftshader * wip * opt aggressive * imgfp16p * opt none * convolution winograd image shader * fix flush range, fast copy path for continous buffer * minor fix * fix innerproduct * wip ... * wip * cast fix * packing test * wip * image fp16p is fp16p * wip * silence * more line info * code clean * softmax image shader
6 years ago
adreno image shader + fp16 + fp16a (#1714) * wip * wip * fix * image and imageview can not be destroyed until command execution ends * fast copy path for tightly packed data * wip * texture load works * 1d 3d image * record clone image, multiple commands share one image reference * upload download image * layer forward accept vkimagemat * vkimagemat graph works * staging vkimagemat for passing dynamic parameters, macro for fp32+image shader, padding image shader * vkimagemat elemsize * convolution test pass * conv1x1s1 image shader * fast staging image allocator from host memory, pooling image shader * convolutiondepthwise image shader * innerproduct image shader * packing image shader * crop deconvolution image shader * resolve spirv binding types * image fp16 and fp16a, cast image shader * eltwise image shader * wip * absval image shader * deconvolutiondepthwise image shader * concat image shader, squeezenet works * noop split image shader * uniform precision hint * layer support_image_storage * wip * vulkan device utility operator * command is storage and packing option aware * fallback to cpu on image allocation failed, mobilenetssd works * flatten image shader, enable more test * ci test * check imgfp32 imgfp16 imgfp16a features * fix ci test * fix ci test * upgrade swiftshader * wip * opt aggressive * imgfp16p * opt none * convolution winograd image shader * fix flush range, fast copy path for continous buffer * minor fix * fix innerproduct * wip ... * wip * cast fix * packing test * wip * image fp16p is fp16p * wip * silence * more line info * code clean * softmax image shader
6 years ago
X86 Elempack 8 AVX implementations. (#1853) * added avx implementations of FC and Max pool * Specify AVX2 * Small fixes and using Fused avx activations * fix type casting * fixing some CI errors * Fix code format * fix pooling test * remove vector typedef * More compile fixes * remove vector typedef * set c++ version to 17 * Force c++ 17 * Fixing mathfun * Try and workaround typedef issues * typefix * Remove typedef * switch to static inline * attempting to fix msvc bug * Verified MSVX FIX * Fixing clang build * commit before switch * More avx and packing implementation * Fix ctest * starting the depthwise pack 8 implementation * Unrolled loop * add depthwise pack 8 implementations * Working 1x1 pack 8 implementation added * revert incorrect changes * added conact elempack 8 * more elempack enabled layers added and started on the conversion of the winograd pack4 conv 3x3 * Added code formatting * fix styling * Unroll loops * unrolling loops * Added more elempac layers for mobilenet v3 * revert commit * fix code style * remove arm neon references * remove pack4 references * More cleanup * added packing avx code * fixing linux build ctests * remove usage of aligned loads * More aligned mem ops removed * Cleanup, revert some files and remove not working winograd and shufflechannel implementation * add stackoverflow referal * Fix windows build * Restyled by clang-format * Restyled by astyle * Restyled by clang-format * Restyled by astyle * implement requested chaanges * remove reshape * revert arm file change * Restyled by clang-format * Restyled by astyle * Restyled by clang-format * Restyled by astyle * fix unterminated directive Co-authored-by: Restyled.io <commits@restyled.io>
6 years ago
adreno image shader + fp16 + fp16a (#1714) * wip * wip * fix * image and imageview can not be destroyed until command execution ends * fast copy path for tightly packed data * wip * texture load works * 1d 3d image * record clone image, multiple commands share one image reference * upload download image * layer forward accept vkimagemat * vkimagemat graph works * staging vkimagemat for passing dynamic parameters, macro for fp32+image shader, padding image shader * vkimagemat elemsize * convolution test pass * conv1x1s1 image shader * fast staging image allocator from host memory, pooling image shader * convolutiondepthwise image shader * innerproduct image shader * packing image shader * crop deconvolution image shader * resolve spirv binding types * image fp16 and fp16a, cast image shader * eltwise image shader * wip * absval image shader * deconvolutiondepthwise image shader * concat image shader, squeezenet works * noop split image shader * uniform precision hint * layer support_image_storage * wip * vulkan device utility operator * command is storage and packing option aware * fallback to cpu on image allocation failed, mobilenetssd works * flatten image shader, enable more test * ci test * check imgfp32 imgfp16 imgfp16a features * fix ci test * fix ci test * upgrade swiftshader * wip * opt aggressive * imgfp16p * opt none * convolution winograd image shader * fix flush range, fast copy path for continous buffer * minor fix * fix innerproduct * wip ... * wip * cast fix * packing test * wip * image fp16p is fp16p * wip * silence * more line info * code clean * softmax image shader
6 years ago
adreno image shader + fp16 + fp16a (#1714) * wip * wip * fix * image and imageview can not be destroyed until command execution ends * fast copy path for tightly packed data * wip * texture load works * 1d 3d image * record clone image, multiple commands share one image reference * upload download image * layer forward accept vkimagemat * vkimagemat graph works * staging vkimagemat for passing dynamic parameters, macro for fp32+image shader, padding image shader * vkimagemat elemsize * convolution test pass * conv1x1s1 image shader * fast staging image allocator from host memory, pooling image shader * convolutiondepthwise image shader * innerproduct image shader * packing image shader * crop deconvolution image shader * resolve spirv binding types * image fp16 and fp16a, cast image shader * eltwise image shader * wip * absval image shader * deconvolutiondepthwise image shader * concat image shader, squeezenet works * noop split image shader * uniform precision hint * layer support_image_storage * wip * vulkan device utility operator * command is storage and packing option aware * fallback to cpu on image allocation failed, mobilenetssd works * flatten image shader, enable more test * ci test * check imgfp32 imgfp16 imgfp16a features * fix ci test * fix ci test * upgrade swiftshader * wip * opt aggressive * imgfp16p * opt none * convolution winograd image shader * fix flush range, fast copy path for continous buffer * minor fix * fix innerproduct * wip ... * wip * cast fix * packing test * wip * image fp16p is fp16p * wip * silence * more line info * code clean * softmax image shader
6 years ago
adreno image shader + fp16 + fp16a (#1714) * wip * wip * fix * image and imageview can not be destroyed until command execution ends * fast copy path for tightly packed data * wip * texture load works * 1d 3d image * record clone image, multiple commands share one image reference * upload download image * layer forward accept vkimagemat * vkimagemat graph works * staging vkimagemat for passing dynamic parameters, macro for fp32+image shader, padding image shader * vkimagemat elemsize * convolution test pass * conv1x1s1 image shader * fast staging image allocator from host memory, pooling image shader * convolutiondepthwise image shader * innerproduct image shader * packing image shader * crop deconvolution image shader * resolve spirv binding types * image fp16 and fp16a, cast image shader * eltwise image shader * wip * absval image shader * deconvolutiondepthwise image shader * concat image shader, squeezenet works * noop split image shader * uniform precision hint * layer support_image_storage * wip * vulkan device utility operator * command is storage and packing option aware * fallback to cpu on image allocation failed, mobilenetssd works * flatten image shader, enable more test * ci test * check imgfp32 imgfp16 imgfp16a features * fix ci test * fix ci test * upgrade swiftshader * wip * opt aggressive * imgfp16p * opt none * convolution winograd image shader * fix flush range, fast copy path for continous buffer * minor fix * fix innerproduct * wip ... * wip * cast fix * packing test * wip * image fp16p is fp16p * wip * silence * more line info * code clean * softmax image shader
6 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498
  1. // Tencent is pleased to support the open source community by making ncnn available.
  2. //
  3. // Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
  4. //
  5. // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
  6. // in compliance with the License. You may obtain a copy of the License at
  7. //
  8. // https://opensource.org/licenses/BSD-3-Clause
  9. //
  10. // Unless required by applicable law or agreed to in writing, software distributed
  11. // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
  12. // CONDITIONS OF ANY KIND, either express or implied. See the License for the
  13. // specific language governing permissions and limitations under the License.
  14. #include "testutil.h"
  15. static int packing_cpu_naive(const ncnn::Mat& a, ncnn::Mat& b, int out_elempack)
  16. {
  17. ncnn::ParamDict pd;
  18. pd.set(0, out_elempack);
  19. std::vector<ncnn::Mat> weights(0);
  20. ncnn::Option opt;
  21. opt.num_threads = 1;
  22. ncnn::Layer* op = ncnn::create_layer_naive("Packing");
  23. op->load_param(pd);
  24. ncnn::ModelBinFromMatArray mb(weights.data());
  25. op->load_model(mb);
  26. op->create_pipeline(opt);
  27. op->forward(a, b, opt);
  28. op->destroy_pipeline(opt);
  29. delete op;
  30. return 0;
  31. }
  32. static int test_packing_cpu_fp32(const ncnn::Mat& a, int in_elempack, int out_elempack)
  33. {
  34. ncnn::ParamDict pd;
  35. pd.set(0, out_elempack);
  36. std::vector<ncnn::Mat> weights(0);
  37. ncnn::Option opt;
  38. opt.num_threads = 1;
  39. opt.use_vulkan_compute = false;
  40. opt.use_int8_inference = false;
  41. opt.use_fp16_storage = false;
  42. opt.use_fp16_arithmetic = false;
  43. opt.use_packing_layout = false;
  44. ncnn::Layer* op = ncnn::create_layer_cpu("Packing");
  45. op->load_param(pd);
  46. ncnn::ModelBinFromMatArray mb(weights.data());
  47. op->load_model(mb);
  48. op->create_pipeline(opt);
  49. ncnn::Mat ap;
  50. ncnn::convert_packing(a, ap, in_elempack, opt);
  51. ncnn::Mat b;
  52. packing_cpu_naive(ap, b, out_elempack);
  53. ncnn::Mat c;
  54. op->forward(ap, c, opt);
  55. op->destroy_pipeline(opt);
  56. delete op;
  57. if (CompareMat(b, c, 0.001) != 0)
  58. {
  59. fprintf(stderr, "test_packing_cpu_fp32 failed a.dims=%d a=(%d %d %d %d) in_elempack=%d out_elempack=%d\n", a.dims, a.w, a.h, a.d, a.c, in_elempack, out_elempack);
  60. return -1;
  61. }
  62. return 0;
  63. }
  64. static int test_packing_cpu_fp16(const ncnn::Mat& a, int in_elempack, int out_elempack)
  65. {
  66. ncnn::ParamDict pd;
  67. pd.set(0, out_elempack);
  68. std::vector<ncnn::Mat> weights(0);
  69. ncnn::Option opt;
  70. opt.num_threads = 1;
  71. opt.use_vulkan_compute = false;
  72. opt.use_int8_inference = false;
  73. opt.use_fp16_storage = true;
  74. opt.use_fp16_arithmetic = true;
  75. opt.use_packing_layout = false;
  76. ncnn::Layer* op = ncnn::create_layer_cpu("Packing");
  77. if (!op->support_fp16_storage)
  78. {
  79. delete op;
  80. return 0;
  81. }
  82. op->load_param(pd);
  83. ncnn::ModelBinFromMatArray mb(weights.data());
  84. op->load_model(mb);
  85. op->create_pipeline(opt);
  86. ncnn::Mat a16;
  87. ncnn::cast_float32_to_float16(a, a16, opt);
  88. ncnn::Mat ap;
  89. ncnn::convert_packing(a16, ap, in_elempack, opt);
  90. ncnn::Mat b;
  91. packing_cpu_naive(ap, b, out_elempack);
  92. ncnn::Mat c;
  93. op->forward(ap, c, opt);
  94. op->destroy_pipeline(opt);
  95. delete op;
  96. ncnn::Mat c32;
  97. ncnn::cast_float16_to_float32(c, c32, opt);
  98. if (CompareMat(b, c32, 0.001) != 0)
  99. {
  100. fprintf(stderr, "test_packing_cpu_fp16 failed a.dims=%d a=(%d %d %d %d) in_elempack=%d out_elempack=%d\n", a.dims, a.w, a.h, a.d, a.c, in_elempack, out_elempack);
  101. return -1;
  102. }
  103. return 0;
  104. }
  105. static int test_packing_cpu_int8(const ncnn::Mat& a, int in_elempack, int out_elempack)
  106. {
  107. ncnn::ParamDict pd;
  108. pd.set(0, out_elempack);
  109. std::vector<ncnn::Mat> weights(0);
  110. ncnn::Option opt;
  111. opt.num_threads = 1;
  112. opt.use_vulkan_compute = false;
  113. opt.use_int8_inference = false;
  114. opt.use_fp16_storage = false;
  115. opt.use_fp16_arithmetic = false;
  116. opt.use_packing_layout = false;
  117. ncnn::Layer* op = ncnn::create_layer_cpu("Packing");
  118. op->load_param(pd);
  119. ncnn::ModelBinFromMatArray mb(weights.data());
  120. op->load_model(mb);
  121. op->create_pipeline(opt);
  122. ncnn::Mat a8;
  123. if (a.dims == 1) a8 = RandomS8Mat(a.w);
  124. if (a.dims == 2) a8 = RandomS8Mat(a.w, a.h);
  125. if (a.dims == 3) a8 = RandomS8Mat(a.w, a.h, a.c);
  126. if (a.dims == 4) a8 = RandomS8Mat(a.w, a.h, a.d, a.c);
  127. ncnn::Mat ap;
  128. ncnn::convert_packing(a8, ap, in_elempack, opt);
  129. ncnn::Mat b;
  130. packing_cpu_naive(ap, b, out_elempack);
  131. ncnn::Mat c;
  132. op->forward(ap, c, opt);
  133. op->destroy_pipeline(opt);
  134. delete op;
  135. ncnn::Mat b32;
  136. ncnn::cast_int8_to_float32(b, b32, opt);
  137. ncnn::Mat c32;
  138. ncnn::cast_int8_to_float32(c, c32, opt);
  139. if (CompareMat(b32, c32, 0.001) != 0)
  140. {
  141. fprintf(stderr, "test_packing_cpu_int8 failed a.dims=%d a=(%d %d %d %d) in_elempack=%d out_elempack=%d\n", a.dims, a.w, a.h, a.d, a.c, in_elempack, out_elempack);
  142. return -1;
  143. }
  144. return 0;
  145. }
  146. static int test_packing_cpu(const ncnn::Mat& a, int in_elempack, int out_elempack)
  147. {
  148. return 0
  149. || test_packing_cpu_fp32(a, in_elempack, out_elempack)
  150. || test_packing_cpu_fp16(a, in_elempack, out_elempack)
  151. || test_packing_cpu_int8(a, in_elempack, out_elempack);
  152. }
  153. #if NCNN_VULKAN
  154. static int test_packing_gpu_fp32(const ncnn::Mat& a, int in_elempack, int out_elempack)
  155. {
  156. ncnn::ParamDict pd;
  157. pd.set(0, out_elempack);
  158. pd.set(2, 1); // cast_type_from
  159. pd.set(3, 1); // cast_type_to
  160. std::vector<ncnn::Mat> weights(0);
  161. ncnn::Option opt;
  162. opt.num_threads = 1;
  163. opt.use_vulkan_compute = true;
  164. opt.use_int8_inference = false;
  165. opt.use_fp16_packed = false;
  166. opt.use_fp16_storage = false;
  167. opt.use_fp16_arithmetic = false;
  168. opt.use_int8_storage = false;
  169. opt.use_int8_arithmetic = false;
  170. opt.use_packing_layout = true;
  171. opt.use_shader_pack8 = true;
  172. ncnn::VulkanDevice* vkdev = ncnn::get_gpu_device();
  173. ncnn::VkAllocator* blob_vkallocator = vkdev->acquire_blob_allocator();
  174. ncnn::VkAllocator* staging_vkallocator = vkdev->acquire_staging_allocator();
  175. opt.blob_vkallocator = blob_vkallocator;
  176. opt.workspace_vkallocator = blob_vkallocator;
  177. opt.staging_vkallocator = staging_vkallocator;
  178. if (!vkdev->info.support_fp16_packed()) opt.use_fp16_packed = false;
  179. if (!vkdev->info.support_fp16_storage()) opt.use_fp16_storage = false;
  180. ncnn::Layer* op = ncnn::create_layer_vulkan("Packing");
  181. op->vkdev = vkdev;
  182. op->load_param(pd);
  183. ncnn::ModelBinFromMatArray mb(weights.data());
  184. op->load_model(mb);
  185. op->create_pipeline(opt);
  186. ncnn::Mat ap;
  187. ncnn::convert_packing(a, ap, in_elempack, opt);
  188. ncnn::Mat b;
  189. packing_cpu_naive(ap, b, out_elempack);
  190. ncnn::Mat d;
  191. // forward
  192. ncnn::VkCompute cmd(vkdev);
  193. // upload
  194. ncnn::VkMat a_gpu;
  195. cmd.record_clone(ap, a_gpu, opt);
  196. ncnn::VkMat d_gpu;
  197. op->forward(a_gpu, d_gpu, cmd, opt);
  198. // download
  199. cmd.record_clone(d_gpu, d, opt);
  200. cmd.submit_and_wait();
  201. op->destroy_pipeline(opt);
  202. delete op;
  203. vkdev->reclaim_blob_allocator(blob_vkallocator);
  204. vkdev->reclaim_staging_allocator(staging_vkallocator);
  205. if (CompareMat(b, d, 0.001) != 0)
  206. {
  207. fprintf(stderr, "test_packing_gpu failed a.dims=%d a=(%d %d %d %d) in_elempack=%d out_elempack=%d\n", a.dims, a.w, a.h, a.d, a.c, in_elempack, out_elempack);
  208. return -1;
  209. }
  210. return 0;
  211. }
  212. static int test_packing_gpu_int8(const ncnn::Mat& a, int in_elempack, int out_elempack)
  213. {
  214. ncnn::ParamDict pd;
  215. pd.set(0, out_elempack);
  216. pd.set(2, 4); // cast_type_from
  217. pd.set(3, 4); // cast_type_to
  218. std::vector<ncnn::Mat> weights(0);
  219. ncnn::Option opt;
  220. opt.num_threads = 1;
  221. opt.use_vulkan_compute = true;
  222. opt.use_int8_inference = false;
  223. opt.use_fp16_packed = false;
  224. opt.use_fp16_storage = false;
  225. opt.use_fp16_arithmetic = false;
  226. opt.use_int8_storage = false;
  227. opt.use_int8_arithmetic = false;
  228. opt.use_packing_layout = true;
  229. opt.use_shader_pack8 = true;
  230. ncnn::VulkanDevice* vkdev = ncnn::get_gpu_device();
  231. ncnn::VkAllocator* blob_vkallocator = vkdev->acquire_blob_allocator();
  232. ncnn::VkAllocator* staging_vkallocator = vkdev->acquire_staging_allocator();
  233. opt.blob_vkallocator = blob_vkallocator;
  234. opt.workspace_vkallocator = blob_vkallocator;
  235. opt.staging_vkallocator = staging_vkallocator;
  236. if (!vkdev->info.support_int8_packed()) opt.use_int8_packed = false;
  237. if (!vkdev->info.support_int8_storage()) opt.use_int8_storage = false;
  238. ncnn::Layer* op = ncnn::create_layer_vulkan("Packing");
  239. op->vkdev = vkdev;
  240. op->load_param(pd);
  241. ncnn::ModelBinFromMatArray mb(weights.data());
  242. op->load_model(mb);
  243. op->create_pipeline(opt);
  244. ncnn::Mat a8;
  245. if (a.dims == 1) a8 = RandomS8Mat(a.w);
  246. if (a.dims == 2) a8 = RandomS8Mat(a.w, a.h);
  247. if (a.dims == 3) a8 = RandomS8Mat(a.w, a.h, a.c);
  248. if (a.dims == 4) a8 = RandomS8Mat(a.w, a.h, a.d, a.c);
  249. ncnn::Mat ap;
  250. ncnn::convert_packing(a8, ap, in_elempack, opt);
  251. ncnn::Mat b;
  252. packing_cpu_naive(ap, b, out_elempack);
  253. ncnn::Mat c;
  254. // forward
  255. ncnn::VkCompute cmd(vkdev);
  256. // upload
  257. ncnn::VkMat a_gpu;
  258. cmd.record_clone(ap, a_gpu, opt);
  259. ncnn::VkMat c_gpu;
  260. op->forward(a_gpu, c_gpu, cmd, opt);
  261. // download
  262. cmd.record_clone(c_gpu, c, opt);
  263. cmd.submit_and_wait();
  264. op->destroy_pipeline(opt);
  265. delete op;
  266. ncnn::Mat b32;
  267. ncnn::cast_int8_to_float32(b, b32, opt);
  268. ncnn::Mat c32;
  269. ncnn::cast_int8_to_float32(c, c32, opt);
  270. if (CompareMat(b32, c32, 0.001) != 0)
  271. {
  272. fprintf(stderr, "test_packing_gpu_int8 failed a.dims=%d a=(%d %d %d %d) in_elempack=%d out_elempack=%d\n", a.dims, a.w, a.h, a.d, a.c, in_elempack, out_elempack);
  273. return -1;
  274. }
  275. return 0;
  276. }
  277. static int test_packing_gpu(const ncnn::Mat& a, int in_elempack, int out_elempack)
  278. {
  279. return 0
  280. || test_packing_gpu_fp32(a, in_elempack, out_elempack)
  281. || test_packing_gpu_int8(a, in_elempack, out_elempack);
  282. }
  283. #endif
  284. static int test_packing_cpu(const ncnn::Mat& a)
  285. {
  286. return 0
  287. || test_packing_cpu(a, 1, 1)
  288. || test_packing_cpu(a, 4, 4)
  289. || test_packing_cpu(a, 4, 8)
  290. || test_packing_cpu(a, 1, 4)
  291. || test_packing_cpu(a, 4, 1)
  292. || test_packing_cpu(a, 1, 8)
  293. || test_packing_cpu(a, 8, 1)
  294. || test_packing_cpu(a, 4, 8)
  295. || test_packing_cpu(a, 8, 4)
  296. || test_packing_cpu(a, 1, 16)
  297. || test_packing_cpu(a, 16, 1)
  298. || test_packing_cpu(a, 4, 16)
  299. || test_packing_cpu(a, 16, 4)
  300. || test_packing_cpu(a, 8, 16)
  301. || test_packing_cpu(a, 16, 8);
  302. }
  303. #if NCNN_VULKAN
  304. static int test_packing_gpu(const ncnn::Mat& a)
  305. {
  306. return 0
  307. || test_packing_gpu(a, 1, 1)
  308. || test_packing_gpu(a, 4, 4)
  309. || test_packing_gpu(a, 8, 8)
  310. || test_packing_gpu(a, 1, 4)
  311. || test_packing_gpu(a, 4, 1)
  312. || test_packing_gpu(a, 1, 8)
  313. || test_packing_gpu(a, 8, 1)
  314. || test_packing_gpu(a, 4, 8)
  315. || test_packing_gpu(a, 8, 4);
  316. }
  317. #endif // NCNN_VULKAN
  318. static int test_packing_0()
  319. {
  320. ncnn::Mat a = RandomMat(9, 7, 10, 16);
  321. ncnn::Mat b = RandomMat(9, 7, 10, 3);
  322. return 0
  323. || test_packing_cpu(a)
  324. || test_packing_cpu(b)
  325. #if NCNN_VULKAN
  326. || test_packing_gpu(a)
  327. #endif
  328. ;
  329. }
  330. static int test_packing_1()
  331. {
  332. ncnn::Mat a = RandomMat(9, 10, 16);
  333. ncnn::Mat b = RandomMat(9, 10, 3);
  334. return 0
  335. || test_packing_cpu(a)
  336. || test_packing_cpu(b)
  337. #if NCNN_VULKAN
  338. || test_packing_gpu(a)
  339. #endif
  340. ;
  341. }
  342. static int test_packing_2()
  343. {
  344. ncnn::Mat a = RandomMat(19, 16);
  345. return 0
  346. || test_packing_cpu(a)
  347. #if NCNN_VULKAN
  348. || test_packing_gpu(a)
  349. #endif
  350. ;
  351. }
  352. static int test_packing_3()
  353. {
  354. ncnn::Mat a = RandomMat(80);
  355. return 0
  356. || test_packing_cpu(a)
  357. #if NCNN_VULKAN
  358. || test_packing_gpu(a)
  359. #endif
  360. ;
  361. }
  362. int main()
  363. {
  364. SRAND(7767517);
  365. return 0
  366. || test_packing_0()
  367. || test_packing_1()
  368. || test_packing_2()
  369. || test_packing_3();
  370. }