You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

test_packing.cpp 17 kB

adreno image shader + fp16 + fp16a (#1714) * wip * wip * fix * image and imageview can not be destroyed until command execution ends * fast copy path for tightly packed data * wip * texture load works * 1d 3d image * record clone image, multiple commands share one image reference * upload download image * layer forward accept vkimagemat * vkimagemat graph works * staging vkimagemat for passing dynamic parameters, macro for fp32+image shader, padding image shader * vkimagemat elemsize * convolution test pass * conv1x1s1 image shader * fast staging image allocator from host memory, pooling image shader * convolutiondepthwise image shader * innerproduct image shader * packing image shader * crop deconvolution image shader * resolve spirv binding types * image fp16 and fp16a, cast image shader * eltwise image shader * wip * absval image shader * deconvolutiondepthwise image shader * concat image shader, squeezenet works * noop split image shader * uniform precision hint * layer support_image_storage * wip * vulkan device utility operator * command is storage and packing option aware * fallback to cpu on image allocation failed, mobilenetssd works * flatten image shader, enable more test * ci test * check imgfp32 imgfp16 imgfp16a features * fix ci test * fix ci test * upgrade swiftshader * wip * opt aggressive * imgfp16p * opt none * convolution winograd image shader * fix flush range, fast copy path for continous buffer * minor fix * fix innerproduct * wip ... * wip * cast fix * packing test * wip * image fp16p is fp16p * wip * silence * more line info * code clean * softmax image shader
6 years ago
adreno image shader + fp16 + fp16a (#1714) * wip * wip * fix * image and imageview can not be destroyed until command execution ends * fast copy path for tightly packed data * wip * texture load works * 1d 3d image * record clone image, multiple commands share one image reference * upload download image * layer forward accept vkimagemat * vkimagemat graph works * staging vkimagemat for passing dynamic parameters, macro for fp32+image shader, padding image shader * vkimagemat elemsize * convolution test pass * conv1x1s1 image shader * fast staging image allocator from host memory, pooling image shader * convolutiondepthwise image shader * innerproduct image shader * packing image shader * crop deconvolution image shader * resolve spirv binding types * image fp16 and fp16a, cast image shader * eltwise image shader * wip * absval image shader * deconvolutiondepthwise image shader * concat image shader, squeezenet works * noop split image shader * uniform precision hint * layer support_image_storage * wip * vulkan device utility operator * command is storage and packing option aware * fallback to cpu on image allocation failed, mobilenetssd works * flatten image shader, enable more test * ci test * check imgfp32 imgfp16 imgfp16a features * fix ci test * fix ci test * upgrade swiftshader * wip * opt aggressive * imgfp16p * opt none * convolution winograd image shader * fix flush range, fast copy path for continous buffer * minor fix * fix innerproduct * wip ... * wip * cast fix * packing test * wip * image fp16p is fp16p * wip * silence * more line info * code clean * softmax image shader
6 years ago
adreno image shader + fp16 + fp16a (#1714) * wip * wip * fix * image and imageview can not be destroyed until command execution ends * fast copy path for tightly packed data * wip * texture load works * 1d 3d image * record clone image, multiple commands share one image reference * upload download image * layer forward accept vkimagemat * vkimagemat graph works * staging vkimagemat for passing dynamic parameters, macro for fp32+image shader, padding image shader * vkimagemat elemsize * convolution test pass * conv1x1s1 image shader * fast staging image allocator from host memory, pooling image shader * convolutiondepthwise image shader * innerproduct image shader * packing image shader * crop deconvolution image shader * resolve spirv binding types * image fp16 and fp16a, cast image shader * eltwise image shader * wip * absval image shader * deconvolutiondepthwise image shader * concat image shader, squeezenet works * noop split image shader * uniform precision hint * layer support_image_storage * wip * vulkan device utility operator * command is storage and packing option aware * fallback to cpu on image allocation failed, mobilenetssd works * flatten image shader, enable more test * ci test * check imgfp32 imgfp16 imgfp16a features * fix ci test * fix ci test * upgrade swiftshader * wip * opt aggressive * imgfp16p * opt none * convolution winograd image shader * fix flush range, fast copy path for continous buffer * minor fix * fix innerproduct * wip ... * wip * cast fix * packing test * wip * image fp16p is fp16p * wip * silence * more line info * code clean * softmax image shader
6 years ago
adreno image shader + fp16 + fp16a (#1714) * wip * wip * fix * image and imageview can not be destroyed until command execution ends * fast copy path for tightly packed data * wip * texture load works * 1d 3d image * record clone image, multiple commands share one image reference * upload download image * layer forward accept vkimagemat * vkimagemat graph works * staging vkimagemat for passing dynamic parameters, macro for fp32+image shader, padding image shader * vkimagemat elemsize * convolution test pass * conv1x1s1 image shader * fast staging image allocator from host memory, pooling image shader * convolutiondepthwise image shader * innerproduct image shader * packing image shader * crop deconvolution image shader * resolve spirv binding types * image fp16 and fp16a, cast image shader * eltwise image shader * wip * absval image shader * deconvolutiondepthwise image shader * concat image shader, squeezenet works * noop split image shader * uniform precision hint * layer support_image_storage * wip * vulkan device utility operator * command is storage and packing option aware * fallback to cpu on image allocation failed, mobilenetssd works * flatten image shader, enable more test * ci test * check imgfp32 imgfp16 imgfp16a features * fix ci test * fix ci test * upgrade swiftshader * wip * opt aggressive * imgfp16p * opt none * convolution winograd image shader * fix flush range, fast copy path for continous buffer * minor fix * fix innerproduct * wip ... * wip * cast fix * packing test * wip * image fp16p is fp16p * wip * silence * more line info * code clean * softmax image shader
6 years ago
adreno image shader + fp16 + fp16a (#1714) * wip * wip * fix * image and imageview can not be destroyed until command execution ends * fast copy path for tightly packed data * wip * texture load works * 1d 3d image * record clone image, multiple commands share one image reference * upload download image * layer forward accept vkimagemat * vkimagemat graph works * staging vkimagemat for passing dynamic parameters, macro for fp32+image shader, padding image shader * vkimagemat elemsize * convolution test pass * conv1x1s1 image shader * fast staging image allocator from host memory, pooling image shader * convolutiondepthwise image shader * innerproduct image shader * packing image shader * crop deconvolution image shader * resolve spirv binding types * image fp16 and fp16a, cast image shader * eltwise image shader * wip * absval image shader * deconvolutiondepthwise image shader * concat image shader, squeezenet works * noop split image shader * uniform precision hint * layer support_image_storage * wip * vulkan device utility operator * command is storage and packing option aware * fallback to cpu on image allocation failed, mobilenetssd works * flatten image shader, enable more test * ci test * check imgfp32 imgfp16 imgfp16a features * fix ci test * fix ci test * upgrade swiftshader * wip * opt aggressive * imgfp16p * opt none * convolution winograd image shader * fix flush range, fast copy path for continous buffer * minor fix * fix innerproduct * wip ... * wip * cast fix * packing test * wip * image fp16p is fp16p * wip * silence * more line info * code clean * softmax image shader
6 years ago
adreno image shader + fp16 + fp16a (#1714) * wip * wip * fix * image and imageview can not be destroyed until command execution ends * fast copy path for tightly packed data * wip * texture load works * 1d 3d image * record clone image, multiple commands share one image reference * upload download image * layer forward accept vkimagemat * vkimagemat graph works * staging vkimagemat for passing dynamic parameters, macro for fp32+image shader, padding image shader * vkimagemat elemsize * convolution test pass * conv1x1s1 image shader * fast staging image allocator from host memory, pooling image shader * convolutiondepthwise image shader * innerproduct image shader * packing image shader * crop deconvolution image shader * resolve spirv binding types * image fp16 and fp16a, cast image shader * eltwise image shader * wip * absval image shader * deconvolutiondepthwise image shader * concat image shader, squeezenet works * noop split image shader * uniform precision hint * layer support_image_storage * wip * vulkan device utility operator * command is storage and packing option aware * fallback to cpu on image allocation failed, mobilenetssd works * flatten image shader, enable more test * ci test * check imgfp32 imgfp16 imgfp16a features * fix ci test * fix ci test * upgrade swiftshader * wip * opt aggressive * imgfp16p * opt none * convolution winograd image shader * fix flush range, fast copy path for continous buffer * minor fix * fix innerproduct * wip ... * wip * cast fix * packing test * wip * image fp16p is fp16p * wip * silence * more line info * code clean * softmax image shader
6 years ago
adreno image shader + fp16 + fp16a (#1714) * wip * wip * fix * image and imageview can not be destroyed until command execution ends * fast copy path for tightly packed data * wip * texture load works * 1d 3d image * record clone image, multiple commands share one image reference * upload download image * layer forward accept vkimagemat * vkimagemat graph works * staging vkimagemat for passing dynamic parameters, macro for fp32+image shader, padding image shader * vkimagemat elemsize * convolution test pass * conv1x1s1 image shader * fast staging image allocator from host memory, pooling image shader * convolutiondepthwise image shader * innerproduct image shader * packing image shader * crop deconvolution image shader * resolve spirv binding types * image fp16 and fp16a, cast image shader * eltwise image shader * wip * absval image shader * deconvolutiondepthwise image shader * concat image shader, squeezenet works * noop split image shader * uniform precision hint * layer support_image_storage * wip * vulkan device utility operator * command is storage and packing option aware * fallback to cpu on image allocation failed, mobilenetssd works * flatten image shader, enable more test * ci test * check imgfp32 imgfp16 imgfp16a features * fix ci test * fix ci test * upgrade swiftshader * wip * opt aggressive * imgfp16p * opt none * convolution winograd image shader * fix flush range, fast copy path for continous buffer * minor fix * fix innerproduct * wip ... * wip * cast fix * packing test * wip * image fp16p is fp16p * wip * silence * more line info * code clean * softmax image shader
6 years ago
adreno image shader + fp16 + fp16a (#1714) * wip * wip * fix * image and imageview can not be destroyed until command execution ends * fast copy path for tightly packed data * wip * texture load works * 1d 3d image * record clone image, multiple commands share one image reference * upload download image * layer forward accept vkimagemat * vkimagemat graph works * staging vkimagemat for passing dynamic parameters, macro for fp32+image shader, padding image shader * vkimagemat elemsize * convolution test pass * conv1x1s1 image shader * fast staging image allocator from host memory, pooling image shader * convolutiondepthwise image shader * innerproduct image shader * packing image shader * crop deconvolution image shader * resolve spirv binding types * image fp16 and fp16a, cast image shader * eltwise image shader * wip * absval image shader * deconvolutiondepthwise image shader * concat image shader, squeezenet works * noop split image shader * uniform precision hint * layer support_image_storage * wip * vulkan device utility operator * command is storage and packing option aware * fallback to cpu on image allocation failed, mobilenetssd works * flatten image shader, enable more test * ci test * check imgfp32 imgfp16 imgfp16a features * fix ci test * fix ci test * upgrade swiftshader * wip * opt aggressive * imgfp16p * opt none * convolution winograd image shader * fix flush range, fast copy path for continous buffer * minor fix * fix innerproduct * wip ... * wip * cast fix * packing test * wip * image fp16p is fp16p * wip * silence * more line info * code clean * softmax image shader
6 years ago
adreno image shader + fp16 + fp16a (#1714) * wip * wip * fix * image and imageview can not be destroyed until command execution ends * fast copy path for tightly packed data * wip * texture load works * 1d 3d image * record clone image, multiple commands share one image reference * upload download image * layer forward accept vkimagemat * vkimagemat graph works * staging vkimagemat for passing dynamic parameters, macro for fp32+image shader, padding image shader * vkimagemat elemsize * convolution test pass * conv1x1s1 image shader * fast staging image allocator from host memory, pooling image shader * convolutiondepthwise image shader * innerproduct image shader * packing image shader * crop deconvolution image shader * resolve spirv binding types * image fp16 and fp16a, cast image shader * eltwise image shader * wip * absval image shader * deconvolutiondepthwise image shader * concat image shader, squeezenet works * noop split image shader * uniform precision hint * layer support_image_storage * wip * vulkan device utility operator * command is storage and packing option aware * fallback to cpu on image allocation failed, mobilenetssd works * flatten image shader, enable more test * ci test * check imgfp32 imgfp16 imgfp16a features * fix ci test * fix ci test * upgrade swiftshader * wip * opt aggressive * imgfp16p * opt none * convolution winograd image shader * fix flush range, fast copy path for continous buffer * minor fix * fix innerproduct * wip ... * wip * cast fix * packing test * wip * image fp16p is fp16p * wip * silence * more line info * code clean * softmax image shader
6 years ago
adreno image shader + fp16 + fp16a (#1714) * wip * wip * fix * image and imageview can not be destroyed until command execution ends * fast copy path for tightly packed data * wip * texture load works * 1d 3d image * record clone image, multiple commands share one image reference * upload download image * layer forward accept vkimagemat * vkimagemat graph works * staging vkimagemat for passing dynamic parameters, macro for fp32+image shader, padding image shader * vkimagemat elemsize * convolution test pass * conv1x1s1 image shader * fast staging image allocator from host memory, pooling image shader * convolutiondepthwise image shader * innerproduct image shader * packing image shader * crop deconvolution image shader * resolve spirv binding types * image fp16 and fp16a, cast image shader * eltwise image shader * wip * absval image shader * deconvolutiondepthwise image shader * concat image shader, squeezenet works * noop split image shader * uniform precision hint * layer support_image_storage * wip * vulkan device utility operator * command is storage and packing option aware * fallback to cpu on image allocation failed, mobilenetssd works * flatten image shader, enable more test * ci test * check imgfp32 imgfp16 imgfp16a features * fix ci test * fix ci test * upgrade swiftshader * wip * opt aggressive * imgfp16p * opt none * convolution winograd image shader * fix flush range, fast copy path for continous buffer * minor fix * fix innerproduct * wip ... * wip * cast fix * packing test * wip * image fp16p is fp16p * wip * silence * more line info * code clean * softmax image shader
6 years ago
adreno image shader + fp16 + fp16a (#1714) * wip * wip * fix * image and imageview can not be destroyed until command execution ends * fast copy path for tightly packed data * wip * texture load works * 1d 3d image * record clone image, multiple commands share one image reference * upload download image * layer forward accept vkimagemat * vkimagemat graph works * staging vkimagemat for passing dynamic parameters, macro for fp32+image shader, padding image shader * vkimagemat elemsize * convolution test pass * conv1x1s1 image shader * fast staging image allocator from host memory, pooling image shader * convolutiondepthwise image shader * innerproduct image shader * packing image shader * crop deconvolution image shader * resolve spirv binding types * image fp16 and fp16a, cast image shader * eltwise image shader * wip * absval image shader * deconvolutiondepthwise image shader * concat image shader, squeezenet works * noop split image shader * uniform precision hint * layer support_image_storage * wip * vulkan device utility operator * command is storage and packing option aware * fallback to cpu on image allocation failed, mobilenetssd works * flatten image shader, enable more test * ci test * check imgfp32 imgfp16 imgfp16a features * fix ci test * fix ci test * upgrade swiftshader * wip * opt aggressive * imgfp16p * opt none * convolution winograd image shader * fix flush range, fast copy path for continous buffer * minor fix * fix innerproduct * wip ... * wip * cast fix * packing test * wip * image fp16p is fp16p * wip * silence * more line info * code clean * softmax image shader
6 years ago
adreno image shader + fp16 + fp16a (#1714) * wip * wip * fix * image and imageview can not be destroyed until command execution ends * fast copy path for tightly packed data * wip * texture load works * 1d 3d image * record clone image, multiple commands share one image reference * upload download image * layer forward accept vkimagemat * vkimagemat graph works * staging vkimagemat for passing dynamic parameters, macro for fp32+image shader, padding image shader * vkimagemat elemsize * convolution test pass * conv1x1s1 image shader * fast staging image allocator from host memory, pooling image shader * convolutiondepthwise image shader * innerproduct image shader * packing image shader * crop deconvolution image shader * resolve spirv binding types * image fp16 and fp16a, cast image shader * eltwise image shader * wip * absval image shader * deconvolutiondepthwise image shader * concat image shader, squeezenet works * noop split image shader * uniform precision hint * layer support_image_storage * wip * vulkan device utility operator * command is storage and packing option aware * fallback to cpu on image allocation failed, mobilenetssd works * flatten image shader, enable more test * ci test * check imgfp32 imgfp16 imgfp16a features * fix ci test * fix ci test * upgrade swiftshader * wip * opt aggressive * imgfp16p * opt none * convolution winograd image shader * fix flush range, fast copy path for continous buffer * minor fix * fix innerproduct * wip ... * wip * cast fix * packing test * wip * image fp16p is fp16p * wip * silence * more line info * code clean * softmax image shader
6 years ago
adreno image shader + fp16 + fp16a (#1714) * wip * wip * fix * image and imageview can not be destroyed until command execution ends * fast copy path for tightly packed data * wip * texture load works * 1d 3d image * record clone image, multiple commands share one image reference * upload download image * layer forward accept vkimagemat * vkimagemat graph works * staging vkimagemat for passing dynamic parameters, macro for fp32+image shader, padding image shader * vkimagemat elemsize * convolution test pass * conv1x1s1 image shader * fast staging image allocator from host memory, pooling image shader * convolutiondepthwise image shader * innerproduct image shader * packing image shader * crop deconvolution image shader * resolve spirv binding types * image fp16 and fp16a, cast image shader * eltwise image shader * wip * absval image shader * deconvolutiondepthwise image shader * concat image shader, squeezenet works * noop split image shader * uniform precision hint * layer support_image_storage * wip * vulkan device utility operator * command is storage and packing option aware * fallback to cpu on image allocation failed, mobilenetssd works * flatten image shader, enable more test * ci test * check imgfp32 imgfp16 imgfp16a features * fix ci test * fix ci test * upgrade swiftshader * wip * opt aggressive * imgfp16p * opt none * convolution winograd image shader * fix flush range, fast copy path for continous buffer * minor fix * fix innerproduct * wip ... * wip * cast fix * packing test * wip * image fp16p is fp16p * wip * silence * more line info * code clean * softmax image shader
6 years ago
adreno image shader + fp16 + fp16a (#1714) * wip * wip * fix * image and imageview can not be destroyed until command execution ends * fast copy path for tightly packed data * wip * texture load works * 1d 3d image * record clone image, multiple commands share one image reference * upload download image * layer forward accept vkimagemat * vkimagemat graph works * staging vkimagemat for passing dynamic parameters, macro for fp32+image shader, padding image shader * vkimagemat elemsize * convolution test pass * conv1x1s1 image shader * fast staging image allocator from host memory, pooling image shader * convolutiondepthwise image shader * innerproduct image shader * packing image shader * crop deconvolution image shader * resolve spirv binding types * image fp16 and fp16a, cast image shader * eltwise image shader * wip * absval image shader * deconvolutiondepthwise image shader * concat image shader, squeezenet works * noop split image shader * uniform precision hint * layer support_image_storage * wip * vulkan device utility operator * command is storage and packing option aware * fallback to cpu on image allocation failed, mobilenetssd works * flatten image shader, enable more test * ci test * check imgfp32 imgfp16 imgfp16a features * fix ci test * fix ci test * upgrade swiftshader * wip * opt aggressive * imgfp16p * opt none * convolution winograd image shader * fix flush range, fast copy path for continous buffer * minor fix * fix innerproduct * wip ... * wip * cast fix * packing test * wip * image fp16p is fp16p * wip * silence * more line info * code clean * softmax image shader
6 years ago
adreno image shader + fp16 + fp16a (#1714) * wip * wip * fix * image and imageview can not be destroyed until command execution ends * fast copy path for tightly packed data * wip * texture load works * 1d 3d image * record clone image, multiple commands share one image reference * upload download image * layer forward accept vkimagemat * vkimagemat graph works * staging vkimagemat for passing dynamic parameters, macro for fp32+image shader, padding image shader * vkimagemat elemsize * convolution test pass * conv1x1s1 image shader * fast staging image allocator from host memory, pooling image shader * convolutiondepthwise image shader * innerproduct image shader * packing image shader * crop deconvolution image shader * resolve spirv binding types * image fp16 and fp16a, cast image shader * eltwise image shader * wip * absval image shader * deconvolutiondepthwise image shader * concat image shader, squeezenet works * noop split image shader * uniform precision hint * layer support_image_storage * wip * vulkan device utility operator * command is storage and packing option aware * fallback to cpu on image allocation failed, mobilenetssd works * flatten image shader, enable more test * ci test * check imgfp32 imgfp16 imgfp16a features * fix ci test * fix ci test * upgrade swiftshader * wip * opt aggressive * imgfp16p * opt none * convolution winograd image shader * fix flush range, fast copy path for continous buffer * minor fix * fix innerproduct * wip ... * wip * cast fix * packing test * wip * image fp16p is fp16p * wip * silence * more line info * code clean * softmax image shader
6 years ago
adreno image shader + fp16 + fp16a (#1714) * wip * wip * fix * image and imageview can not be destroyed until command execution ends * fast copy path for tightly packed data * wip * texture load works * 1d 3d image * record clone image, multiple commands share one image reference * upload download image * layer forward accept vkimagemat * vkimagemat graph works * staging vkimagemat for passing dynamic parameters, macro for fp32+image shader, padding image shader * vkimagemat elemsize * convolution test pass * conv1x1s1 image shader * fast staging image allocator from host memory, pooling image shader * convolutiondepthwise image shader * innerproduct image shader * packing image shader * crop deconvolution image shader * resolve spirv binding types * image fp16 and fp16a, cast image shader * eltwise image shader * wip * absval image shader * deconvolutiondepthwise image shader * concat image shader, squeezenet works * noop split image shader * uniform precision hint * layer support_image_storage * wip * vulkan device utility operator * command is storage and packing option aware * fallback to cpu on image allocation failed, mobilenetssd works * flatten image shader, enable more test * ci test * check imgfp32 imgfp16 imgfp16a features * fix ci test * fix ci test * upgrade swiftshader * wip * opt aggressive * imgfp16p * opt none * convolution winograd image shader * fix flush range, fast copy path for continous buffer * minor fix * fix innerproduct * wip ... * wip * cast fix * packing test * wip * image fp16p is fp16p * wip * silence * more line info * code clean * softmax image shader
6 years ago
adreno image shader + fp16 + fp16a (#1714) * wip * wip * fix * image and imageview can not be destroyed until command execution ends * fast copy path for tightly packed data * wip * texture load works * 1d 3d image * record clone image, multiple commands share one image reference * upload download image * layer forward accept vkimagemat * vkimagemat graph works * staging vkimagemat for passing dynamic parameters, macro for fp32+image shader, padding image shader * vkimagemat elemsize * convolution test pass * conv1x1s1 image shader * fast staging image allocator from host memory, pooling image shader * convolutiondepthwise image shader * innerproduct image shader * packing image shader * crop deconvolution image shader * resolve spirv binding types * image fp16 and fp16a, cast image shader * eltwise image shader * wip * absval image shader * deconvolutiondepthwise image shader * concat image shader, squeezenet works * noop split image shader * uniform precision hint * layer support_image_storage * wip * vulkan device utility operator * command is storage and packing option aware * fallback to cpu on image allocation failed, mobilenetssd works * flatten image shader, enable more test * ci test * check imgfp32 imgfp16 imgfp16a features * fix ci test * fix ci test * upgrade swiftshader * wip * opt aggressive * imgfp16p * opt none * convolution winograd image shader * fix flush range, fast copy path for continous buffer * minor fix * fix innerproduct * wip ... * wip * cast fix * packing test * wip * image fp16p is fp16p * wip * silence * more line info * code clean * softmax image shader
6 years ago
adreno image shader + fp16 + fp16a (#1714) * wip * wip * fix * image and imageview can not be destroyed until command execution ends * fast copy path for tightly packed data * wip * texture load works * 1d 3d image * record clone image, multiple commands share one image reference * upload download image * layer forward accept vkimagemat * vkimagemat graph works * staging vkimagemat for passing dynamic parameters, macro for fp32+image shader, padding image shader * vkimagemat elemsize * convolution test pass * conv1x1s1 image shader * fast staging image allocator from host memory, pooling image shader * convolutiondepthwise image shader * innerproduct image shader * packing image shader * crop deconvolution image shader * resolve spirv binding types * image fp16 and fp16a, cast image shader * eltwise image shader * wip * absval image shader * deconvolutiondepthwise image shader * concat image shader, squeezenet works * noop split image shader * uniform precision hint * layer support_image_storage * wip * vulkan device utility operator * command is storage and packing option aware * fallback to cpu on image allocation failed, mobilenetssd works * flatten image shader, enable more test * ci test * check imgfp32 imgfp16 imgfp16a features * fix ci test * fix ci test * upgrade swiftshader * wip * opt aggressive * imgfp16p * opt none * convolution winograd image shader * fix flush range, fast copy path for continous buffer * minor fix * fix innerproduct * wip ... * wip * cast fix * packing test * wip * image fp16p is fp16p * wip * silence * more line info * code clean * softmax image shader
6 years ago
adreno image shader + fp16 + fp16a (#1714) * wip * wip * fix * image and imageview can not be destroyed until command execution ends * fast copy path for tightly packed data * wip * texture load works * 1d 3d image * record clone image, multiple commands share one image reference * upload download image * layer forward accept vkimagemat * vkimagemat graph works * staging vkimagemat for passing dynamic parameters, macro for fp32+image shader, padding image shader * vkimagemat elemsize * convolution test pass * conv1x1s1 image shader * fast staging image allocator from host memory, pooling image shader * convolutiondepthwise image shader * innerproduct image shader * packing image shader * crop deconvolution image shader * resolve spirv binding types * image fp16 and fp16a, cast image shader * eltwise image shader * wip * absval image shader * deconvolutiondepthwise image shader * concat image shader, squeezenet works * noop split image shader * uniform precision hint * layer support_image_storage * wip * vulkan device utility operator * command is storage and packing option aware * fallback to cpu on image allocation failed, mobilenetssd works * flatten image shader, enable more test * ci test * check imgfp32 imgfp16 imgfp16a features * fix ci test * fix ci test * upgrade swiftshader * wip * opt aggressive * imgfp16p * opt none * convolution winograd image shader * fix flush range, fast copy path for continous buffer * minor fix * fix innerproduct * wip ... * wip * cast fix * packing test * wip * image fp16p is fp16p * wip * silence * more line info * code clean * softmax image shader
6 years ago
adreno image shader + fp16 + fp16a (#1714) * wip * wip * fix * image and imageview can not be destroyed until command execution ends * fast copy path for tightly packed data * wip * texture load works * 1d 3d image * record clone image, multiple commands share one image reference * upload download image * layer forward accept vkimagemat * vkimagemat graph works * staging vkimagemat for passing dynamic parameters, macro for fp32+image shader, padding image shader * vkimagemat elemsize * convolution test pass * conv1x1s1 image shader * fast staging image allocator from host memory, pooling image shader * convolutiondepthwise image shader * innerproduct image shader * packing image shader * crop deconvolution image shader * resolve spirv binding types * image fp16 and fp16a, cast image shader * eltwise image shader * wip * absval image shader * deconvolutiondepthwise image shader * concat image shader, squeezenet works * noop split image shader * uniform precision hint * layer support_image_storage * wip * vulkan device utility operator * command is storage and packing option aware * fallback to cpu on image allocation failed, mobilenetssd works * flatten image shader, enable more test * ci test * check imgfp32 imgfp16 imgfp16a features * fix ci test * fix ci test * upgrade swiftshader * wip * opt aggressive * imgfp16p * opt none * convolution winograd image shader * fix flush range, fast copy path for continous buffer * minor fix * fix innerproduct * wip ... * wip * cast fix * packing test * wip * image fp16p is fp16p * wip * silence * more line info * code clean * softmax image shader
6 years ago
adreno image shader + fp16 + fp16a (#1714) * wip * wip * fix * image and imageview can not be destroyed until command execution ends * fast copy path for tightly packed data * wip * texture load works * 1d 3d image * record clone image, multiple commands share one image reference * upload download image * layer forward accept vkimagemat * vkimagemat graph works * staging vkimagemat for passing dynamic parameters, macro for fp32+image shader, padding image shader * vkimagemat elemsize * convolution test pass * conv1x1s1 image shader * fast staging image allocator from host memory, pooling image shader * convolutiondepthwise image shader * innerproduct image shader * packing image shader * crop deconvolution image shader * resolve spirv binding types * image fp16 and fp16a, cast image shader * eltwise image shader * wip * absval image shader * deconvolutiondepthwise image shader * concat image shader, squeezenet works * noop split image shader * uniform precision hint * layer support_image_storage * wip * vulkan device utility operator * command is storage and packing option aware * fallback to cpu on image allocation failed, mobilenetssd works * flatten image shader, enable more test * ci test * check imgfp32 imgfp16 imgfp16a features * fix ci test * fix ci test * upgrade swiftshader * wip * opt aggressive * imgfp16p * opt none * convolution winograd image shader * fix flush range, fast copy path for continous buffer * minor fix * fix innerproduct * wip ... * wip * cast fix * packing test * wip * image fp16p is fp16p * wip * silence * more line info * code clean * softmax image shader
6 years ago
adreno image shader + fp16 + fp16a (#1714) * wip * wip * fix * image and imageview can not be destroyed until command execution ends * fast copy path for tightly packed data * wip * texture load works * 1d 3d image * record clone image, multiple commands share one image reference * upload download image * layer forward accept vkimagemat * vkimagemat graph works * staging vkimagemat for passing dynamic parameters, macro for fp32+image shader, padding image shader * vkimagemat elemsize * convolution test pass * conv1x1s1 image shader * fast staging image allocator from host memory, pooling image shader * convolutiondepthwise image shader * innerproduct image shader * packing image shader * crop deconvolution image shader * resolve spirv binding types * image fp16 and fp16a, cast image shader * eltwise image shader * wip * absval image shader * deconvolutiondepthwise image shader * concat image shader, squeezenet works * noop split image shader * uniform precision hint * layer support_image_storage * wip * vulkan device utility operator * command is storage and packing option aware * fallback to cpu on image allocation failed, mobilenetssd works * flatten image shader, enable more test * ci test * check imgfp32 imgfp16 imgfp16a features * fix ci test * fix ci test * upgrade swiftshader * wip * opt aggressive * imgfp16p * opt none * convolution winograd image shader * fix flush range, fast copy path for continous buffer * minor fix * fix innerproduct * wip ... * wip * cast fix * packing test * wip * image fp16p is fp16p * wip * silence * more line info * code clean * softmax image shader
6 years ago
adreno image shader + fp16 + fp16a (#1714) * wip * wip * fix * image and imageview can not be destroyed until command execution ends * fast copy path for tightly packed data * wip * texture load works * 1d 3d image * record clone image, multiple commands share one image reference * upload download image * layer forward accept vkimagemat * vkimagemat graph works * staging vkimagemat for passing dynamic parameters, macro for fp32+image shader, padding image shader * vkimagemat elemsize * convolution test pass * conv1x1s1 image shader * fast staging image allocator from host memory, pooling image shader * convolutiondepthwise image shader * innerproduct image shader * packing image shader * crop deconvolution image shader * resolve spirv binding types * image fp16 and fp16a, cast image shader * eltwise image shader * wip * absval image shader * deconvolutiondepthwise image shader * concat image shader, squeezenet works * noop split image shader * uniform precision hint * layer support_image_storage * wip * vulkan device utility operator * command is storage and packing option aware * fallback to cpu on image allocation failed, mobilenetssd works * flatten image shader, enable more test * ci test * check imgfp32 imgfp16 imgfp16a features * fix ci test * fix ci test * upgrade swiftshader * wip * opt aggressive * imgfp16p * opt none * convolution winograd image shader * fix flush range, fast copy path for continous buffer * minor fix * fix innerproduct * wip ... * wip * cast fix * packing test * wip * image fp16p is fp16p * wip * silence * more line info * code clean * softmax image shader
6 years ago
adreno image shader + fp16 + fp16a (#1714) * wip * wip * fix * image and imageview can not be destroyed until command execution ends * fast copy path for tightly packed data * wip * texture load works * 1d 3d image * record clone image, multiple commands share one image reference * upload download image * layer forward accept vkimagemat * vkimagemat graph works * staging vkimagemat for passing dynamic parameters, macro for fp32+image shader, padding image shader * vkimagemat elemsize * convolution test pass * conv1x1s1 image shader * fast staging image allocator from host memory, pooling image shader * convolutiondepthwise image shader * innerproduct image shader * packing image shader * crop deconvolution image shader * resolve spirv binding types * image fp16 and fp16a, cast image shader * eltwise image shader * wip * absval image shader * deconvolutiondepthwise image shader * concat image shader, squeezenet works * noop split image shader * uniform precision hint * layer support_image_storage * wip * vulkan device utility operator * command is storage and packing option aware * fallback to cpu on image allocation failed, mobilenetssd works * flatten image shader, enable more test * ci test * check imgfp32 imgfp16 imgfp16a features * fix ci test * fix ci test * upgrade swiftshader * wip * opt aggressive * imgfp16p * opt none * convolution winograd image shader * fix flush range, fast copy path for continous buffer * minor fix * fix innerproduct * wip ... * wip * cast fix * packing test * wip * image fp16p is fp16p * wip * silence * more line info * code clean * softmax image shader
6 years ago
adreno image shader + fp16 + fp16a (#1714) * wip * wip * fix * image and imageview can not be destroyed until command execution ends * fast copy path for tightly packed data * wip * texture load works * 1d 3d image * record clone image, multiple commands share one image reference * upload download image * layer forward accept vkimagemat * vkimagemat graph works * staging vkimagemat for passing dynamic parameters, macro for fp32+image shader, padding image shader * vkimagemat elemsize * convolution test pass * conv1x1s1 image shader * fast staging image allocator from host memory, pooling image shader * convolutiondepthwise image shader * innerproduct image shader * packing image shader * crop deconvolution image shader * resolve spirv binding types * image fp16 and fp16a, cast image shader * eltwise image shader * wip * absval image shader * deconvolutiondepthwise image shader * concat image shader, squeezenet works * noop split image shader * uniform precision hint * layer support_image_storage * wip * vulkan device utility operator * command is storage and packing option aware * fallback to cpu on image allocation failed, mobilenetssd works * flatten image shader, enable more test * ci test * check imgfp32 imgfp16 imgfp16a features * fix ci test * fix ci test * upgrade swiftshader * wip * opt aggressive * imgfp16p * opt none * convolution winograd image shader * fix flush range, fast copy path for continous buffer * minor fix * fix innerproduct * wip ... * wip * cast fix * packing test * wip * image fp16p is fp16p * wip * silence * more line info * code clean * softmax image shader
6 years ago
adreno image shader + fp16 + fp16a (#1714) * wip * wip * fix * image and imageview can not be destroyed until command execution ends * fast copy path for tightly packed data * wip * texture load works * 1d 3d image * record clone image, multiple commands share one image reference * upload download image * layer forward accept vkimagemat * vkimagemat graph works * staging vkimagemat for passing dynamic parameters, macro for fp32+image shader, padding image shader * vkimagemat elemsize * convolution test pass * conv1x1s1 image shader * fast staging image allocator from host memory, pooling image shader * convolutiondepthwise image shader * innerproduct image shader * packing image shader * crop deconvolution image shader * resolve spirv binding types * image fp16 and fp16a, cast image shader * eltwise image shader * wip * absval image shader * deconvolutiondepthwise image shader * concat image shader, squeezenet works * noop split image shader * uniform precision hint * layer support_image_storage * wip * vulkan device utility operator * command is storage and packing option aware * fallback to cpu on image allocation failed, mobilenetssd works * flatten image shader, enable more test * ci test * check imgfp32 imgfp16 imgfp16a features * fix ci test * fix ci test * upgrade swiftshader * wip * opt aggressive * imgfp16p * opt none * convolution winograd image shader * fix flush range, fast copy path for continous buffer * minor fix * fix innerproduct * wip ... * wip * cast fix * packing test * wip * image fp16p is fp16p * wip * silence * more line info * code clean * softmax image shader
6 years ago
adreno image shader + fp16 + fp16a (#1714) * wip * wip * fix * image and imageview can not be destroyed until command execution ends * fast copy path for tightly packed data * wip * texture load works * 1d 3d image * record clone image, multiple commands share one image reference * upload download image * layer forward accept vkimagemat * vkimagemat graph works * staging vkimagemat for passing dynamic parameters, macro for fp32+image shader, padding image shader * vkimagemat elemsize * convolution test pass * conv1x1s1 image shader * fast staging image allocator from host memory, pooling image shader * convolutiondepthwise image shader * innerproduct image shader * packing image shader * crop deconvolution image shader * resolve spirv binding types * image fp16 and fp16a, cast image shader * eltwise image shader * wip * absval image shader * deconvolutiondepthwise image shader * concat image shader, squeezenet works * noop split image shader * uniform precision hint * layer support_image_storage * wip * vulkan device utility operator * command is storage and packing option aware * fallback to cpu on image allocation failed, mobilenetssd works * flatten image shader, enable more test * ci test * check imgfp32 imgfp16 imgfp16a features * fix ci test * fix ci test * upgrade swiftshader * wip * opt aggressive * imgfp16p * opt none * convolution winograd image shader * fix flush range, fast copy path for continous buffer * minor fix * fix innerproduct * wip ... * wip * cast fix * packing test * wip * image fp16p is fp16p * wip * silence * more line info * code clean * softmax image shader
6 years ago
adreno image shader + fp16 + fp16a (#1714) * wip * wip * fix * image and imageview can not be destroyed until command execution ends * fast copy path for tightly packed data * wip * texture load works * 1d 3d image * record clone image, multiple commands share one image reference * upload download image * layer forward accept vkimagemat * vkimagemat graph works * staging vkimagemat for passing dynamic parameters, macro for fp32+image shader, padding image shader * vkimagemat elemsize * convolution test pass * conv1x1s1 image shader * fast staging image allocator from host memory, pooling image shader * convolutiondepthwise image shader * innerproduct image shader * packing image shader * crop deconvolution image shader * resolve spirv binding types * image fp16 and fp16a, cast image shader * eltwise image shader * wip * absval image shader * deconvolutiondepthwise image shader * concat image shader, squeezenet works * noop split image shader * uniform precision hint * layer support_image_storage * wip * vulkan device utility operator * command is storage and packing option aware * fallback to cpu on image allocation failed, mobilenetssd works * flatten image shader, enable more test * ci test * check imgfp32 imgfp16 imgfp16a features * fix ci test * fix ci test * upgrade swiftshader * wip * opt aggressive * imgfp16p * opt none * convolution winograd image shader * fix flush range, fast copy path for continous buffer * minor fix * fix innerproduct * wip ... * wip * cast fix * packing test * wip * image fp16p is fp16p * wip * silence * more line info * code clean * softmax image shader
6 years ago
adreno image shader + fp16 + fp16a (#1714) * wip * wip * fix * image and imageview can not be destroyed until command execution ends * fast copy path for tightly packed data * wip * texture load works * 1d 3d image * record clone image, multiple commands share one image reference * upload download image * layer forward accept vkimagemat * vkimagemat graph works * staging vkimagemat for passing dynamic parameters, macro for fp32+image shader, padding image shader * vkimagemat elemsize * convolution test pass * conv1x1s1 image shader * fast staging image allocator from host memory, pooling image shader * convolutiondepthwise image shader * innerproduct image shader * packing image shader * crop deconvolution image shader * resolve spirv binding types * image fp16 and fp16a, cast image shader * eltwise image shader * wip * absval image shader * deconvolutiondepthwise image shader * concat image shader, squeezenet works * noop split image shader * uniform precision hint * layer support_image_storage * wip * vulkan device utility operator * command is storage and packing option aware * fallback to cpu on image allocation failed, mobilenetssd works * flatten image shader, enable more test * ci test * check imgfp32 imgfp16 imgfp16a features * fix ci test * fix ci test * upgrade swiftshader * wip * opt aggressive * imgfp16p * opt none * convolution winograd image shader * fix flush range, fast copy path for continous buffer * minor fix * fix innerproduct * wip ... * wip * cast fix * packing test * wip * image fp16p is fp16p * wip * silence * more line info * code clean * softmax image shader
6 years ago
adreno image shader + fp16 + fp16a (#1714) * wip * wip * fix * image and imageview can not be destroyed until command execution ends * fast copy path for tightly packed data * wip * texture load works * 1d 3d image * record clone image, multiple commands share one image reference * upload download image * layer forward accept vkimagemat * vkimagemat graph works * staging vkimagemat for passing dynamic parameters, macro for fp32+image shader, padding image shader * vkimagemat elemsize * convolution test pass * conv1x1s1 image shader * fast staging image allocator from host memory, pooling image shader * convolutiondepthwise image shader * innerproduct image shader * packing image shader * crop deconvolution image shader * resolve spirv binding types * image fp16 and fp16a, cast image shader * eltwise image shader * wip * absval image shader * deconvolutiondepthwise image shader * concat image shader, squeezenet works * noop split image shader * uniform precision hint * layer support_image_storage * wip * vulkan device utility operator * command is storage and packing option aware * fallback to cpu on image allocation failed, mobilenetssd works * flatten image shader, enable more test * ci test * check imgfp32 imgfp16 imgfp16a features * fix ci test * fix ci test * upgrade swiftshader * wip * opt aggressive * imgfp16p * opt none * convolution winograd image shader * fix flush range, fast copy path for continous buffer * minor fix * fix innerproduct * wip ... * wip * cast fix * packing test * wip * image fp16p is fp16p * wip * silence * more line info * code clean * softmax image shader
6 years ago
X86 Elempack 8 AVX implementations. (#1853) * added avx implementations of FC and Max pool * Specify AVX2 * Small fixes and using Fused avx activations * fix type casting * fixing some CI errors * Fix code format * fix pooling test * remove vector typedef * More compile fixes * remove vector typedef * set c++ version to 17 * Force c++ 17 * Fixing mathfun * Try and workaround typedef issues * typefix * Remove typedef * switch to static inline * attempting to fix msvc bug * Verified MSVX FIX * Fixing clang build * commit before switch * More avx and packing implementation * Fix ctest * starting the depthwise pack 8 implementation * Unrolled loop * add depthwise pack 8 implementations * Working 1x1 pack 8 implementation added * revert incorrect changes * added conact elempack 8 * more elempack enabled layers added and started on the conversion of the winograd pack4 conv 3x3 * Added code formatting * fix styling * Unroll loops * unrolling loops * Added more elempac layers for mobilenet v3 * revert commit * fix code style * remove arm neon references * remove pack4 references * More cleanup * added packing avx code * fixing linux build ctests * remove usage of aligned loads * More aligned mem ops removed * Cleanup, revert some files and remove not working winograd and shufflechannel implementation * add stackoverflow referal * Fix windows build * Restyled by clang-format * Restyled by astyle * Restyled by clang-format * Restyled by astyle * implement requested chaanges * remove reshape * revert arm file change * Restyled by clang-format * Restyled by astyle * Restyled by clang-format * Restyled by astyle * fix unterminated directive Co-authored-by: Restyled.io <commits@restyled.io>
6 years ago
adreno image shader + fp16 + fp16a (#1714) * wip * wip * fix * image and imageview can not be destroyed until command execution ends * fast copy path for tightly packed data * wip * texture load works * 1d 3d image * record clone image, multiple commands share one image reference * upload download image * layer forward accept vkimagemat * vkimagemat graph works * staging vkimagemat for passing dynamic parameters, macro for fp32+image shader, padding image shader * vkimagemat elemsize * convolution test pass * conv1x1s1 image shader * fast staging image allocator from host memory, pooling image shader * convolutiondepthwise image shader * innerproduct image shader * packing image shader * crop deconvolution image shader * resolve spirv binding types * image fp16 and fp16a, cast image shader * eltwise image shader * wip * absval image shader * deconvolutiondepthwise image shader * concat image shader, squeezenet works * noop split image shader * uniform precision hint * layer support_image_storage * wip * vulkan device utility operator * command is storage and packing option aware * fallback to cpu on image allocation failed, mobilenetssd works * flatten image shader, enable more test * ci test * check imgfp32 imgfp16 imgfp16a features * fix ci test * fix ci test * upgrade swiftshader * wip * opt aggressive * imgfp16p * opt none * convolution winograd image shader * fix flush range, fast copy path for continous buffer * minor fix * fix innerproduct * wip ... * wip * cast fix * packing test * wip * image fp16p is fp16p * wip * silence * more line info * code clean * softmax image shader
6 years ago
adreno image shader + fp16 + fp16a (#1714) * wip * wip * fix * image and imageview can not be destroyed until command execution ends * fast copy path for tightly packed data * wip * texture load works * 1d 3d image * record clone image, multiple commands share one image reference * upload download image * layer forward accept vkimagemat * vkimagemat graph works * staging vkimagemat for passing dynamic parameters, macro for fp32+image shader, padding image shader * vkimagemat elemsize * convolution test pass * conv1x1s1 image shader * fast staging image allocator from host memory, pooling image shader * convolutiondepthwise image shader * innerproduct image shader * packing image shader * crop deconvolution image shader * resolve spirv binding types * image fp16 and fp16a, cast image shader * eltwise image shader * wip * absval image shader * deconvolutiondepthwise image shader * concat image shader, squeezenet works * noop split image shader * uniform precision hint * layer support_image_storage * wip * vulkan device utility operator * command is storage and packing option aware * fallback to cpu on image allocation failed, mobilenetssd works * flatten image shader, enable more test * ci test * check imgfp32 imgfp16 imgfp16a features * fix ci test * fix ci test * upgrade swiftshader * wip * opt aggressive * imgfp16p * opt none * convolution winograd image shader * fix flush range, fast copy path for continous buffer * minor fix * fix innerproduct * wip ... * wip * cast fix * packing test * wip * image fp16p is fp16p * wip * silence * more line info * code clean * softmax image shader
6 years ago
adreno image shader + fp16 + fp16a (#1714) * wip * wip * fix * image and imageview can not be destroyed until command execution ends * fast copy path for tightly packed data * wip * texture load works * 1d 3d image * record clone image, multiple commands share one image reference * upload download image * layer forward accept vkimagemat * vkimagemat graph works * staging vkimagemat for passing dynamic parameters, macro for fp32+image shader, padding image shader * vkimagemat elemsize * convolution test pass * conv1x1s1 image shader * fast staging image allocator from host memory, pooling image shader * convolutiondepthwise image shader * innerproduct image shader * packing image shader * crop deconvolution image shader * resolve spirv binding types * image fp16 and fp16a, cast image shader * eltwise image shader * wip * absval image shader * deconvolutiondepthwise image shader * concat image shader, squeezenet works * noop split image shader * uniform precision hint * layer support_image_storage * wip * vulkan device utility operator * command is storage and packing option aware * fallback to cpu on image allocation failed, mobilenetssd works * flatten image shader, enable more test * ci test * check imgfp32 imgfp16 imgfp16a features * fix ci test * fix ci test * upgrade swiftshader * wip * opt aggressive * imgfp16p * opt none * convolution winograd image shader * fix flush range, fast copy path for continous buffer * minor fix * fix innerproduct * wip ... * wip * cast fix * packing test * wip * image fp16p is fp16p * wip * silence * more line info * code clean * softmax image shader
6 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640
  1. // Tencent is pleased to support the open source community by making ncnn available.
  2. //
  3. // Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
  4. //
  5. // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
  6. // in compliance with the License. You may obtain a copy of the License at
  7. //
  8. // https://opensource.org/licenses/BSD-3-Clause
  9. //
  10. // Unless required by applicable law or agreed to in writing, software distributed
  11. // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
  12. // CONDITIONS OF ANY KIND, either express or implied. See the License for the
  13. // specific language governing permissions and limitations under the License.
  14. #include "testutil.h"
  15. static int packing_cpu_naive(const ncnn::Mat& a, ncnn::Mat& b, int out_elempack)
  16. {
  17. ncnn::ParamDict pd;
  18. pd.set(0, out_elempack);
  19. std::vector<ncnn::Mat> weights(0);
  20. ncnn::Option opt;
  21. opt.num_threads = 1;
  22. ncnn::Layer* op = ncnn::create_layer_naive("Packing");
  23. op->load_param(pd);
  24. ncnn::ModelBinFromMatArray mb(weights.data());
  25. op->load_model(mb);
  26. op->create_pipeline(opt);
  27. op->forward(a, b, opt);
  28. op->destroy_pipeline(opt);
  29. delete op;
  30. return 0;
  31. }
  32. static int test_packing_cpu_fp32(const ncnn::Mat& a, int in_elempack, int out_elempack)
  33. {
  34. ncnn::ParamDict pd;
  35. pd.set(0, out_elempack);
  36. std::vector<ncnn::Mat> weights(0);
  37. ncnn::Option opt;
  38. opt.num_threads = 1;
  39. opt.use_vulkan_compute = false;
  40. opt.use_int8_inference = false;
  41. opt.use_fp16_storage = false;
  42. opt.use_fp16_arithmetic = false;
  43. opt.use_packing_layout = false;
  44. ncnn::Layer* op = ncnn::create_layer_cpu("Packing");
  45. op->load_param(pd);
  46. ncnn::ModelBinFromMatArray mb(weights.data());
  47. op->load_model(mb);
  48. op->create_pipeline(opt);
  49. ncnn::Mat ap;
  50. ncnn::convert_packing(a, ap, in_elempack, opt);
  51. ncnn::Mat b;
  52. packing_cpu_naive(ap, b, out_elempack);
  53. ncnn::Mat c;
  54. op->forward(ap, c, opt);
  55. op->destroy_pipeline(opt);
  56. delete op;
  57. if (CompareMat(b, c, 0.001) != 0)
  58. {
  59. fprintf(stderr, "test_packing_cpu_fp32 failed a.dims=%d a=(%d %d %d %d) in_elempack=%d out_elempack=%d\n", a.dims, a.w, a.h, a.d, a.c, in_elempack, out_elempack);
  60. return -1;
  61. }
  62. return 0;
  63. }
  64. static int test_packing_cpu_fp16(const ncnn::Mat& a, int in_elempack, int out_elempack)
  65. {
  66. ncnn::ParamDict pd;
  67. pd.set(0, out_elempack);
  68. std::vector<ncnn::Mat> weights(0);
  69. ncnn::Option opt;
  70. opt.num_threads = 1;
  71. opt.use_vulkan_compute = false;
  72. opt.use_int8_inference = false;
  73. opt.use_fp16_storage = true;
  74. opt.use_fp16_arithmetic = true;
  75. opt.use_packing_layout = false;
  76. ncnn::Layer* op = ncnn::create_layer_cpu("Packing");
  77. if (!op->support_fp16_storage)
  78. {
  79. delete op;
  80. return 0;
  81. }
  82. op->load_param(pd);
  83. ncnn::ModelBinFromMatArray mb(weights.data());
  84. op->load_model(mb);
  85. op->create_pipeline(opt);
  86. ncnn::Mat a16;
  87. ncnn::cast_float32_to_float16(a, a16, opt);
  88. ncnn::Mat ap;
  89. ncnn::convert_packing(a16, ap, in_elempack, opt);
  90. ncnn::Mat b;
  91. packing_cpu_naive(ap, b, out_elempack);
  92. ncnn::Mat c;
  93. op->forward(ap, c, opt);
  94. op->destroy_pipeline(opt);
  95. delete op;
  96. ncnn::Mat c32;
  97. ncnn::cast_float16_to_float32(c, c32, opt);
  98. if (CompareMat(b, c32, 0.001) != 0)
  99. {
  100. fprintf(stderr, "test_packing_cpu_fp16 failed a.dims=%d a=(%d %d %d %d) in_elempack=%d out_elempack=%d\n", a.dims, a.w, a.h, a.d, a.c, in_elempack, out_elempack);
  101. return -1;
  102. }
  103. return 0;
  104. }
  105. static int test_packing_cpu_int8(const ncnn::Mat& a, int in_elempack, int out_elempack)
  106. {
  107. ncnn::ParamDict pd;
  108. pd.set(0, out_elempack);
  109. std::vector<ncnn::Mat> weights(0);
  110. ncnn::Option opt;
  111. opt.num_threads = 1;
  112. opt.use_vulkan_compute = false;
  113. opt.use_int8_inference = false;
  114. opt.use_fp16_storage = false;
  115. opt.use_fp16_arithmetic = false;
  116. opt.use_packing_layout = false;
  117. ncnn::Layer* op = ncnn::create_layer_cpu("Packing");
  118. op->load_param(pd);
  119. ncnn::ModelBinFromMatArray mb(weights.data());
  120. op->load_model(mb);
  121. op->create_pipeline(opt);
  122. ncnn::Mat a8;
  123. if (a.dims == 1) a8 = RandomS8Mat(a.w);
  124. if (a.dims == 2) a8 = RandomS8Mat(a.w, a.h);
  125. if (a.dims == 3) a8 = RandomS8Mat(a.w, a.h, a.c);
  126. if (a.dims == 4) a8 = RandomS8Mat(a.w, a.h, a.d, a.c);
  127. ncnn::Mat ap;
  128. ncnn::convert_packing(a8, ap, in_elempack, opt);
  129. ncnn::Mat b;
  130. packing_cpu_naive(ap, b, out_elempack);
  131. ncnn::Mat c;
  132. op->forward(ap, c, opt);
  133. op->destroy_pipeline(opt);
  134. delete op;
  135. ncnn::Mat b32;
  136. ncnn::cast_int8_to_float32(b, b32, opt);
  137. ncnn::Mat c32;
  138. ncnn::cast_int8_to_float32(c, c32, opt);
  139. if (CompareMat(b32, c32, 0.001) != 0)
  140. {
  141. fprintf(stderr, "test_packing_cpu_int8 failed a.dims=%d a=(%d %d %d %d) in_elempack=%d out_elempack=%d\n", a.dims, a.w, a.h, a.d, a.c, in_elempack, out_elempack);
  142. return -1;
  143. }
  144. return 0;
  145. }
  146. static int test_packing_cpu(const ncnn::Mat& a, int in_elempack, int out_elempack)
  147. {
  148. return 0
  149. || test_packing_cpu_fp32(a, in_elempack, out_elempack)
  150. || test_packing_cpu_fp16(a, in_elempack, out_elempack)
  151. || test_packing_cpu_int8(a, in_elempack, out_elempack);
  152. }
  153. #if NCNN_VULKAN
  154. static int test_packing_gpu_buffer(const ncnn::Mat& a, int in_elempack, int out_elempack)
  155. {
  156. ncnn::ParamDict pd;
  157. pd.set(0, out_elempack);
  158. pd.set(2, 1); // cast_type_from
  159. pd.set(3, 1); // cast_type_to
  160. pd.set(4, 0); // storage_type_from
  161. pd.set(5, 0); // storage_type_to
  162. std::vector<ncnn::Mat> weights(0);
  163. ncnn::Option opt;
  164. opt.num_threads = 1;
  165. opt.use_vulkan_compute = true;
  166. opt.use_int8_inference = false;
  167. opt.use_fp16_packed = false;
  168. opt.use_fp16_storage = false;
  169. opt.use_fp16_arithmetic = false;
  170. opt.use_int8_storage = false;
  171. opt.use_int8_arithmetic = false;
  172. opt.use_packing_layout = true;
  173. opt.use_shader_pack8 = true;
  174. opt.use_image_storage = false;
  175. ncnn::VulkanDevice* vkdev = ncnn::get_gpu_device();
  176. ncnn::VkAllocator* blob_vkallocator = vkdev->acquire_blob_allocator();
  177. ncnn::VkAllocator* staging_vkallocator = vkdev->acquire_staging_allocator();
  178. opt.blob_vkallocator = blob_vkallocator;
  179. opt.workspace_vkallocator = blob_vkallocator;
  180. opt.staging_vkallocator = staging_vkallocator;
  181. if (!vkdev->info.support_fp16_packed()) opt.use_fp16_packed = false;
  182. if (!vkdev->info.support_fp16_storage()) opt.use_fp16_storage = false;
  183. ncnn::Layer* op = ncnn::create_layer_vulkan("Packing");
  184. op->vkdev = vkdev;
  185. op->load_param(pd);
  186. ncnn::ModelBinFromMatArray mb(weights.data());
  187. op->load_model(mb);
  188. op->create_pipeline(opt);
  189. ncnn::Mat ap;
  190. ncnn::convert_packing(a, ap, in_elempack, opt);
  191. ncnn::Mat b;
  192. packing_cpu_naive(ap, b, out_elempack);
  193. ncnn::Mat d;
  194. // forward
  195. ncnn::VkCompute cmd(vkdev);
  196. // upload
  197. ncnn::VkMat a_gpu;
  198. cmd.record_clone(ap, a_gpu, opt);
  199. ncnn::VkMat d_gpu;
  200. op->forward(a_gpu, d_gpu, cmd, opt);
  201. // download
  202. cmd.record_clone(d_gpu, d, opt);
  203. cmd.submit_and_wait();
  204. op->destroy_pipeline(opt);
  205. delete op;
  206. vkdev->reclaim_blob_allocator(blob_vkallocator);
  207. vkdev->reclaim_staging_allocator(staging_vkallocator);
  208. if (CompareMat(b, d, 0.001) != 0)
  209. {
  210. fprintf(stderr, "test_packing_gpu_buffer failed a.dims=%d a=(%d %d %d %d) in_elempack=%d out_elempack=%d\n", a.dims, a.w, a.h, a.d, a.c, in_elempack, out_elempack);
  211. return -1;
  212. }
  213. return 0;
  214. }
  215. static int test_packing_gpu_image(const ncnn::Mat& a, int in_elempack, int out_elempack)
  216. {
  217. ncnn::ParamDict pd;
  218. pd.set(0, out_elempack);
  219. pd.set(2, 1); // cast_type_from
  220. pd.set(3, 1); // cast_type_to
  221. pd.set(4, 1); // storage_type_from
  222. pd.set(5, 1); // storage_type_to
  223. std::vector<ncnn::Mat> weights(0);
  224. ncnn::Option opt;
  225. opt.num_threads = 1;
  226. opt.use_vulkan_compute = true;
  227. opt.use_int8_inference = false;
  228. opt.use_fp16_packed = false;
  229. opt.use_fp16_storage = false;
  230. opt.use_fp16_arithmetic = false;
  231. opt.use_int8_storage = false;
  232. opt.use_int8_arithmetic = false;
  233. opt.use_packing_layout = true;
  234. opt.use_shader_pack8 = true;
  235. opt.use_image_storage = true;
  236. ncnn::VulkanDevice* vkdev = ncnn::get_gpu_device();
  237. ncnn::VkAllocator* blob_vkallocator = vkdev->acquire_blob_allocator();
  238. ncnn::VkAllocator* staging_vkallocator = vkdev->acquire_staging_allocator();
  239. opt.blob_vkallocator = blob_vkallocator;
  240. opt.workspace_vkallocator = blob_vkallocator;
  241. opt.staging_vkallocator = staging_vkallocator;
  242. if (!vkdev->info.support_fp16_packed()) opt.use_fp16_packed = false;
  243. if (!vkdev->info.support_fp16_storage()) opt.use_fp16_storage = false;
  244. ncnn::Layer* op = ncnn::create_layer_vulkan("Packing");
  245. op->vkdev = vkdev;
  246. op->load_param(pd);
  247. ncnn::ModelBinFromMatArray mb(weights.data());
  248. op->load_model(mb);
  249. op->create_pipeline(opt);
  250. ncnn::Mat ap;
  251. ncnn::convert_packing(a, ap, in_elempack, opt);
  252. ncnn::Mat b;
  253. packing_cpu_naive(ap, b, out_elempack);
  254. ncnn::Mat d;
  255. // forward
  256. ncnn::VkCompute cmd(vkdev);
  257. // upload
  258. ncnn::VkImageMat a_gpu;
  259. cmd.record_clone(ap, a_gpu, opt);
  260. ncnn::VkImageMat d_gpu;
  261. op->forward(a_gpu, d_gpu, cmd, opt);
  262. // download
  263. cmd.record_clone(d_gpu, d, opt);
  264. cmd.submit_and_wait();
  265. op->destroy_pipeline(opt);
  266. delete op;
  267. vkdev->reclaim_blob_allocator(blob_vkallocator);
  268. vkdev->reclaim_staging_allocator(staging_vkallocator);
  269. if (CompareMat(b, d, 0.001) != 0)
  270. {
  271. fprintf(stderr, "test_packing_gpu_image failed a.dims=%d a=(%d %d %d %d) in_elempack=%d out_elempack=%d\n", a.dims, a.w, a.h, a.d, a.c, in_elempack, out_elempack);
  272. return -1;
  273. }
  274. return 0;
  275. }
  276. static int test_packing_gpu_buffer2image(const ncnn::Mat& a, int in_elempack, int out_elempack)
  277. {
  278. ncnn::Option opt;
  279. opt.num_threads = 1;
  280. opt.use_vulkan_compute = true;
  281. opt.use_int8_inference = false;
  282. opt.use_fp16_packed = false;
  283. opt.use_fp16_storage = false;
  284. opt.use_fp16_arithmetic = false;
  285. opt.use_int8_storage = false;
  286. opt.use_int8_arithmetic = false;
  287. opt.use_packing_layout = true;
  288. opt.use_shader_pack8 = true;
  289. opt.use_image_storage = true;
  290. ncnn::VulkanDevice* vkdev = ncnn::get_gpu_device();
  291. ncnn::VkAllocator* blob_vkallocator = vkdev->acquire_blob_allocator();
  292. ncnn::VkAllocator* staging_vkallocator = vkdev->acquire_staging_allocator();
  293. opt.blob_vkallocator = blob_vkallocator;
  294. opt.workspace_vkallocator = blob_vkallocator;
  295. opt.staging_vkallocator = staging_vkallocator;
  296. if (!vkdev->info.support_fp16_packed()) opt.use_fp16_packed = false;
  297. if (!vkdev->info.support_fp16_storage()) opt.use_fp16_storage = false;
  298. ncnn::Mat ap;
  299. ncnn::convert_packing(a, ap, in_elempack, opt);
  300. ncnn::Mat b;
  301. packing_cpu_naive(ap, b, out_elempack);
  302. ncnn::Mat d;
  303. // forward
  304. ncnn::VkCompute cmd(vkdev);
  305. // upload
  306. ncnn::VkMat a_gpu;
  307. cmd.record_clone(ap, a_gpu, opt);
  308. ncnn::VkImageMat d_gpu;
  309. vkdev->convert_packing(a_gpu, d_gpu, out_elempack, cmd, opt);
  310. // download
  311. cmd.record_clone(d_gpu, d, opt);
  312. cmd.submit_and_wait();
  313. vkdev->reclaim_blob_allocator(blob_vkallocator);
  314. vkdev->reclaim_staging_allocator(staging_vkallocator);
  315. if (CompareMat(b, d, 0.001) != 0)
  316. {
  317. fprintf(stderr, "test_packing_gpu_buffer2image failed a.dims=%d a=(%d %d %d %d) in_elempack=%d out_elempack=%d\n", a.dims, a.w, a.h, a.d, a.c, in_elempack, out_elempack);
  318. return -1;
  319. }
  320. return 0;
  321. }
  322. static int test_packing_gpu_image2buffer(const ncnn::Mat& a, int in_elempack, int out_elempack)
  323. {
  324. ncnn::Option opt;
  325. opt.num_threads = 1;
  326. opt.use_vulkan_compute = true;
  327. opt.use_int8_inference = false;
  328. opt.use_fp16_packed = false;
  329. opt.use_fp16_storage = false;
  330. opt.use_fp16_arithmetic = false;
  331. opt.use_int8_storage = false;
  332. opt.use_int8_arithmetic = false;
  333. opt.use_packing_layout = true;
  334. opt.use_shader_pack8 = true;
  335. opt.use_image_storage = true;
  336. ncnn::VulkanDevice* vkdev = ncnn::get_gpu_device();
  337. ncnn::VkAllocator* blob_vkallocator = vkdev->acquire_blob_allocator();
  338. ncnn::VkAllocator* staging_vkallocator = vkdev->acquire_staging_allocator();
  339. opt.blob_vkallocator = blob_vkallocator;
  340. opt.workspace_vkallocator = blob_vkallocator;
  341. opt.staging_vkallocator = staging_vkallocator;
  342. if (!vkdev->info.support_fp16_packed()) opt.use_fp16_packed = false;
  343. if (!vkdev->info.support_fp16_storage()) opt.use_fp16_storage = false;
  344. ncnn::Mat ap;
  345. ncnn::convert_packing(a, ap, in_elempack, opt);
  346. ncnn::Mat b;
  347. packing_cpu_naive(ap, b, out_elempack);
  348. ncnn::Mat d;
  349. // forward
  350. ncnn::VkCompute cmd(vkdev);
  351. // upload
  352. ncnn::VkImageMat a_gpu;
  353. cmd.record_clone(ap, a_gpu, opt);
  354. ncnn::VkMat d_gpu;
  355. vkdev->convert_packing(a_gpu, d_gpu, out_elempack, cmd, opt);
  356. // download
  357. cmd.record_clone(d_gpu, d, opt);
  358. cmd.submit_and_wait();
  359. vkdev->reclaim_blob_allocator(blob_vkallocator);
  360. vkdev->reclaim_staging_allocator(staging_vkallocator);
  361. if (CompareMat(b, d, 0.001) != 0)
  362. {
  363. fprintf(stderr, "test_packing_gpu_image2buffer failed a.dims=%d a=(%d %d %d %d) in_elempack=%d out_elempack=%d\n", a.dims, a.w, a.h, a.d, a.c, in_elempack, out_elempack);
  364. return -1;
  365. }
  366. return 0;
  367. }
  368. #endif
  369. static int test_packing_cpu(const ncnn::Mat& a)
  370. {
  371. return 0
  372. || test_packing_cpu(a, 1, 1)
  373. || test_packing_cpu(a, 4, 4)
  374. || test_packing_cpu(a, 4, 8)
  375. || test_packing_cpu(a, 1, 4)
  376. || test_packing_cpu(a, 4, 1)
  377. || test_packing_cpu(a, 1, 8)
  378. || test_packing_cpu(a, 8, 1)
  379. || test_packing_cpu(a, 4, 8)
  380. || test_packing_cpu(a, 8, 4)
  381. || test_packing_cpu(a, 1, 16)
  382. || test_packing_cpu(a, 16, 1)
  383. || test_packing_cpu(a, 4, 16)
  384. || test_packing_cpu(a, 16, 4)
  385. || test_packing_cpu(a, 8, 16)
  386. || test_packing_cpu(a, 16, 8);
  387. }
  388. #if NCNN_VULKAN
  389. static int test_packing_gpu(const ncnn::Mat& a)
  390. {
  391. return 0
  392. || test_packing_gpu_buffer(a, 1, 1)
  393. || test_packing_gpu_buffer(a, 4, 4)
  394. || test_packing_gpu_buffer(a, 8, 8)
  395. || test_packing_gpu_buffer(a, 1, 4)
  396. || test_packing_gpu_buffer(a, 4, 1)
  397. || test_packing_gpu_buffer(a, 1, 8)
  398. || test_packing_gpu_buffer(a, 8, 1)
  399. || test_packing_gpu_buffer(a, 4, 8)
  400. || test_packing_gpu_buffer(a, 8, 4)
  401. || test_packing_gpu_image(a, 1, 1)
  402. || test_packing_gpu_image(a, 4, 4)
  403. || test_packing_gpu_image(a, 8, 8)
  404. || test_packing_gpu_image(a, 1, 4)
  405. || test_packing_gpu_image(a, 4, 1)
  406. || test_packing_gpu_image(a, 1, 8)
  407. || test_packing_gpu_image(a, 8, 1)
  408. || test_packing_gpu_image(a, 4, 8)
  409. || test_packing_gpu_image(a, 8, 4)
  410. || test_packing_gpu_buffer2image(a, 1, 1)
  411. || test_packing_gpu_buffer2image(a, 4, 4)
  412. || test_packing_gpu_buffer2image(a, 8, 8)
  413. || test_packing_gpu_buffer2image(a, 1, 4)
  414. || test_packing_gpu_buffer2image(a, 4, 1)
  415. || test_packing_gpu_buffer2image(a, 1, 8)
  416. || test_packing_gpu_buffer2image(a, 8, 1)
  417. || test_packing_gpu_buffer2image(a, 4, 8)
  418. || test_packing_gpu_buffer2image(a, 8, 4)
  419. || test_packing_gpu_image2buffer(a, 1, 1)
  420. || test_packing_gpu_image2buffer(a, 4, 4)
  421. || test_packing_gpu_image2buffer(a, 8, 8)
  422. || test_packing_gpu_image2buffer(a, 1, 4)
  423. || test_packing_gpu_image2buffer(a, 4, 1)
  424. || test_packing_gpu_image2buffer(a, 1, 8)
  425. || test_packing_gpu_image2buffer(a, 8, 1)
  426. || test_packing_gpu_image2buffer(a, 4, 8)
  427. || test_packing_gpu_image2buffer(a, 8, 4);
  428. }
  429. #endif // NCNN_VULKAN
  430. static int test_packing_0()
  431. {
  432. ncnn::Mat a = RandomMat(9, 7, 10, 16);
  433. ncnn::Mat b = RandomMat(9, 7, 10, 3);
  434. return 0
  435. || test_packing_cpu(a)
  436. || test_packing_cpu(b)
  437. #if NCNN_VULKAN
  438. || test_packing_gpu(a)
  439. #endif
  440. ;
  441. }
  442. static int test_packing_1()
  443. {
  444. ncnn::Mat a = RandomMat(9, 10, 16);
  445. ncnn::Mat b = RandomMat(9, 10, 3);
  446. return 0
  447. || test_packing_cpu(a)
  448. || test_packing_cpu(b)
  449. #if NCNN_VULKAN
  450. || test_packing_gpu(a)
  451. #endif
  452. ;
  453. }
  454. static int test_packing_2()
  455. {
  456. ncnn::Mat a = RandomMat(19, 16);
  457. return 0
  458. || test_packing_cpu(a)
  459. #if NCNN_VULKAN
  460. || test_packing_gpu(a)
  461. #endif
  462. ;
  463. }
  464. static int test_packing_3()
  465. {
  466. ncnn::Mat a = RandomMat(80);
  467. return 0
  468. || test_packing_cpu(a)
  469. #if NCNN_VULKAN
  470. || test_packing_gpu(a)
  471. #endif
  472. ;
  473. }
  474. int main()
  475. {
  476. SRAND(7767517);
  477. return 0
  478. || test_packing_0()
  479. || test_packing_1()
  480. || test_packing_2()
  481. || test_packing_3();
  482. }