| @@ -25,6 +25,7 @@ PixelShuffle::PixelShuffle() | |||
| int PixelShuffle::load_param(const ParamDict& pd) | |||
| { | |||
| upscale_factor = pd.get(0, 1); | |||
| mode = pd.get(1, 0); | |||
| return 0; | |||
| } | |||
| @@ -53,7 +54,13 @@ int PixelShuffle::forward(const Mat& bottom_blob, Mat& top_blob, const Option& o | |||
| { | |||
| for (int sw = 0; sw < upscale_factor; sw++) | |||
| { | |||
| const float* sptr = bottom_blob.channel(p * upscale_factor * upscale_factor + sh * upscale_factor + sw); | |||
| int q; | |||
| if (mode == 0) | |||
| q = p * upscale_factor * upscale_factor + sh * upscale_factor + sw; | |||
| else // if (mode == 1) | |||
| q = (sh * upscale_factor + sw) * outc + p; | |||
| const float* sptr = bottom_blob.channel(q); | |||
| for (int i = 0; i < h; i++) | |||
| { | |||
| @@ -30,6 +30,7 @@ public: | |||
| public: | |||
| int upscale_factor; | |||
| int mode; | |||
| }; | |||
| } // namespace ncnn | |||
| @@ -24,7 +24,7 @@ Reorg::Reorg() | |||
| int Reorg::load_param(const ParamDict& pd) | |||
| { | |||
| stride = pd.get(0, 0); | |||
| stride = pd.get(0, 1); | |||
| mode = pd.get(1, 0); | |||
| return 0; | |||
| @@ -82,18 +82,19 @@ int PixelShuffle_vulkan::create_pipeline(const Option& _opt) | |||
| opt.use_image_storage = false; | |||
| } | |||
| std::vector<vk_specialization_type> specializations(1 + 10); | |||
| std::vector<vk_specialization_type> specializations(2 + 10); | |||
| specializations[0].i = upscale_factor; | |||
| specializations[1 + 0].i = shape_packed.dims; | |||
| specializations[1 + 1].i = shape_packed.w; | |||
| specializations[1 + 2].i = shape_packed.h; | |||
| specializations[1 + 3].i = shape_packed.c; | |||
| specializations[1 + 4].i = shape_packed.cstep; | |||
| specializations[1 + 5].i = out_shape_packed.dims; | |||
| specializations[1 + 6].i = out_shape_packed.w; | |||
| specializations[1 + 7].i = out_shape_packed.h; | |||
| specializations[1 + 8].i = out_shape_packed.c; | |||
| specializations[1 + 9].i = out_shape_packed.cstep; | |||
| specializations[1].i = mode; | |||
| specializations[2 + 0].i = shape_packed.dims; | |||
| specializations[2 + 1].i = shape_packed.w; | |||
| specializations[2 + 2].i = shape_packed.h; | |||
| specializations[2 + 3].i = shape_packed.c; | |||
| specializations[2 + 4].i = shape_packed.cstep; | |||
| specializations[2 + 5].i = out_shape_packed.dims; | |||
| specializations[2 + 6].i = out_shape_packed.w; | |||
| specializations[2 + 7].i = out_shape_packed.h; | |||
| specializations[2 + 8].i = out_shape_packed.c; | |||
| specializations[2 + 9].i = out_shape_packed.cstep; | |||
| Mat local_size_xyz_bottom; // pack4to1 and pack8to1 | |||
| if (shape_packed.dims != 3) | |||
| @@ -22,8 +22,9 @@ | |||
| #endif | |||
| layout (constant_id = 0) const int upscale_factor = 0; | |||
| layout (constant_id = 1) const int mode = 0; | |||
| #define shape_constant_id_offset 1 | |||
| #define shape_constant_id_offset 2 | |||
| layout (constant_id = shape_constant_id_offset + 0) const int dims = 0; | |||
| layout (constant_id = shape_constant_id_offset + 1) const int w = 0; | |||
| layout (constant_id = shape_constant_id_offset + 2) const int h = 0; | |||
| @@ -70,7 +71,15 @@ void main() | |||
| int x = gx / upscale_factor; | |||
| int y = gy / upscale_factor; | |||
| int z = gz * upscale_factor * upscale_factor + (gy % upscale_factor) * upscale_factor + gx % upscale_factor; | |||
| int z; | |||
| if (mode == 0) | |||
| { | |||
| z = gz * upscale_factor * upscale_factor + (gy % upscale_factor) * upscale_factor + gx % upscale_factor; | |||
| } | |||
| else // if (mode == 1) | |||
| { | |||
| z = ((gy % upscale_factor) * upscale_factor + gx % upscale_factor) * psc(outc) + gz; | |||
| } | |||
| #if NCNN_image_shader | |||
| image3d_cp1(top_blob, ivec3(gx, gy, gz), bottom_blob, ivec3(x, y, z)); | |||
| @@ -22,8 +22,9 @@ | |||
| #endif | |||
| layout (constant_id = 0) const int upscale_factor = 0; | |||
| layout (constant_id = 1) const int mode = 0; | |||
| #define shape_constant_id_offset 1 | |||
| #define shape_constant_id_offset 2 | |||
| layout (constant_id = shape_constant_id_offset + 0) const int dims = 0; | |||
| layout (constant_id = shape_constant_id_offset + 1) const int w = 0; | |||
| layout (constant_id = shape_constant_id_offset + 2) const int h = 0; | |||
| @@ -76,7 +77,15 @@ void main() | |||
| int x = gx / upscale_factor; | |||
| int y = gy / upscale_factor; | |||
| ivec4 z4 = gz4 * upscale_factor * upscale_factor + (gy % upscale_factor) * upscale_factor + gx % upscale_factor; | |||
| ivec4 z4; | |||
| if (mode == 0) | |||
| { | |||
| z4 = gz4 * upscale_factor * upscale_factor + (gy % upscale_factor) * upscale_factor + gx % upscale_factor; | |||
| } | |||
| else // if (mode == 1) | |||
| { | |||
| z4 = ((gy % upscale_factor) * upscale_factor + gx % upscale_factor) * psc(outc) * 4 + gz4; | |||
| } | |||
| #if NCNN_image_shader | |||
| afpvec4 vr = image3d_ld4(bottom_blob, ivec3(x, y, z4.r / 4)); | |||
| @@ -22,8 +22,9 @@ | |||
| #endif | |||
| layout (constant_id = 0) const int upscale_factor = 0; | |||
| layout (constant_id = 1) const int mode = 0; | |||
| #define shape_constant_id_offset 1 | |||
| #define shape_constant_id_offset 2 | |||
| layout (constant_id = shape_constant_id_offset + 0) const int dims = 0; | |||
| layout (constant_id = shape_constant_id_offset + 1) const int w = 0; | |||
| layout (constant_id = shape_constant_id_offset + 2) const int h = 0; | |||
| @@ -70,8 +71,18 @@ void main() | |||
| ivec4 gz4 = gz * 4 + ivec4(0, 1, 2, 3); | |||
| ivec4 z4 = gz4 / (upscale_factor * upscale_factor); | |||
| ivec4 zi4 = gz4 % (upscale_factor * upscale_factor); | |||
| ivec4 z4; | |||
| ivec4 zi4; | |||
| if (mode == 0) | |||
| { | |||
| z4 = gz4 / (upscale_factor * upscale_factor); | |||
| zi4 = gz4 % (upscale_factor * upscale_factor); | |||
| } | |||
| else // if (mode == 1) | |||
| { | |||
| z4 = gz4 % psc(outc); | |||
| zi4 = gz4 / psc(outc); | |||
| } | |||
| ivec4 y4 = gy * upscale_factor + zi4 / upscale_factor; | |||
| ivec4 x4 = gx * upscale_factor + zi4 % upscale_factor; | |||
| @@ -23,8 +23,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; | |||
| #endif | |||
| layout (constant_id = 0) const int upscale_factor = 0; | |||
| layout (constant_id = 1) const int mode = 0; | |||
| #define shape_constant_id_offset 1 | |||
| #define shape_constant_id_offset 2 | |||
| layout (constant_id = shape_constant_id_offset + 0) const int dims = 0; | |||
| layout (constant_id = shape_constant_id_offset + 1) const int w = 0; | |||
| layout (constant_id = shape_constant_id_offset + 2) const int h = 0; | |||
| @@ -78,8 +79,18 @@ void main() | |||
| int x = gx / upscale_factor; | |||
| int y = gy / upscale_factor; | |||
| ivec4 z4 = gz4 * upscale_factor * upscale_factor + (gy % upscale_factor) * upscale_factor + gx % upscale_factor; | |||
| ivec4 zz4 = gzz4 * upscale_factor * upscale_factor + (gy % upscale_factor) * upscale_factor + gx % upscale_factor; | |||
| ivec4 z4; | |||
| ivec4 zz4; | |||
| if (mode == 0) | |||
| { | |||
| z4 = gz4 * upscale_factor * upscale_factor + (gy % upscale_factor) * upscale_factor + gx % upscale_factor; | |||
| zz4 = gzz4 * upscale_factor * upscale_factor + (gy % upscale_factor) * upscale_factor + gx % upscale_factor; | |||
| } | |||
| else // if (mode == 1) | |||
| { | |||
| z4 = ((gy % upscale_factor) * upscale_factor + gx % upscale_factor) * psc(outc) * 8 + gz4; | |||
| zz4 = ((gy % upscale_factor) * upscale_factor + gx % upscale_factor) * psc(outc) * 8 + gzz4; | |||
| } | |||
| #if NCNN_image_shader | |||
| afpvec8 v0 = image3d_ld8(bottom_blob, ivec3(x, y, z4.r / 8)); | |||
| @@ -23,8 +23,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; | |||
| #endif | |||
| layout (constant_id = 0) const int upscale_factor = 0; | |||
| layout (constant_id = 1) const int mode = 0; | |||
| #define shape_constant_id_offset 1 | |||
| #define shape_constant_id_offset 2 | |||
| layout (constant_id = shape_constant_id_offset + 0) const int dims = 0; | |||
| layout (constant_id = shape_constant_id_offset + 1) const int w = 0; | |||
| layout (constant_id = shape_constant_id_offset + 2) const int h = 0; | |||
| @@ -72,12 +73,26 @@ void main() | |||
| ivec4 gz4 = gz * 8 + ivec4(0, 1, 2, 3); | |||
| ivec4 gzz4 = gz4 + 4; | |||
| ivec4 z4 = gz4 / (upscale_factor * upscale_factor); | |||
| ivec4 zi4 = gz4 % (upscale_factor * upscale_factor); | |||
| ivec4 z4; | |||
| ivec4 zi4; | |||
| ivec4 zz4; | |||
| ivec4 zii4; | |||
| if (mode == 0) | |||
| { | |||
| z4 = gz4 / (upscale_factor * upscale_factor); | |||
| zi4 = gz4 % (upscale_factor * upscale_factor); | |||
| zz4 = gzz4 / (upscale_factor * upscale_factor); | |||
| zii4 = gzz4 % (upscale_factor * upscale_factor); | |||
| } | |||
| else // if (mode == 1) | |||
| { | |||
| z4 = gz4 % psc(outc); | |||
| zi4 = gz4 / psc(outc); | |||
| zz4 = gzz4 % psc(outc); | |||
| zii4 = gzz4 / psc(outc); | |||
| } | |||
| ivec4 y4 = gy * upscale_factor + zi4 / upscale_factor; | |||
| ivec4 x4 = gx * upscale_factor + zi4 % upscale_factor; | |||
| ivec4 zz4 = gzz4 / (upscale_factor * upscale_factor); | |||
| ivec4 zii4 = gzz4 % (upscale_factor * upscale_factor); | |||
| ivec4 yy4 = gy * upscale_factor + zii4 / upscale_factor; | |||
| ivec4 xx4 = gx * upscale_factor + zii4 % upscale_factor; | |||
| @@ -23,8 +23,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; | |||
| #endif | |||
| layout (constant_id = 0) const int upscale_factor = 0; | |||
| layout (constant_id = 1) const int mode = 0; | |||
| #define shape_constant_id_offset 1 | |||
| #define shape_constant_id_offset 2 | |||
| layout (constant_id = shape_constant_id_offset + 0) const int dims = 0; | |||
| layout (constant_id = shape_constant_id_offset + 1) const int w = 0; | |||
| layout (constant_id = shape_constant_id_offset + 2) const int h = 0; | |||
| @@ -77,7 +78,15 @@ void main() | |||
| int x = gx / upscale_factor; | |||
| int y = gy / upscale_factor; | |||
| ivec4 z4 = gz4 * upscale_factor * upscale_factor + (gy % upscale_factor) * upscale_factor + gx % upscale_factor; | |||
| ivec4 z4; | |||
| if (mode == 0) | |||
| { | |||
| z4 = gz4 * upscale_factor * upscale_factor + (gy % upscale_factor) * upscale_factor + gx % upscale_factor; | |||
| } | |||
| else // if (mode == 1) | |||
| { | |||
| z4 = ((gy % upscale_factor) * upscale_factor + gx % upscale_factor) * psc(outc) * 4 + gz4; | |||
| } | |||
| #if NCNN_image_shader | |||
| afpvec8 v0 = image3d_ld8(bottom_blob, ivec3(x, y, z4.r / 8)); | |||
| @@ -15,17 +15,18 @@ | |||
| #include "layer/pixelshuffle.h" | |||
| #include "testutil.h" | |||
| static int test_pixelshuffle(const ncnn::Mat& a, int upscale_factor) | |||
| static int test_pixelshuffle(const ncnn::Mat& a, int upscale_factor, int mode) | |||
| { | |||
| ncnn::ParamDict pd; | |||
| pd.set(0, upscale_factor); | |||
| pd.set(1, mode); | |||
| std::vector<ncnn::Mat> weights(0); | |||
| int ret = test_layer<ncnn::PixelShuffle>("PixelShuffle", pd, weights, a); | |||
| if (ret != 0) | |||
| { | |||
| fprintf(stderr, "test_pixelshuffle failed a.dims=%d a=(%d %d %d) upscale_factor=%d\n", a.dims, a.w, a.h, a.c, upscale_factor); | |||
| fprintf(stderr, "test_pixelshuffle failed a.dims=%d a=(%d %d %d) upscale_factor=%d mode=%d\n", a.dims, a.w, a.h, a.c, upscale_factor, mode); | |||
| } | |||
| return ret; | |||
| @@ -34,18 +35,30 @@ static int test_pixelshuffle(const ncnn::Mat& a, int upscale_factor) | |||
| static int test_pixelshuffle_0() | |||
| { | |||
| return 0 | |||
| || test_pixelshuffle(RandomMat(3, 7, 1), 1) | |||
| || test_pixelshuffle(RandomMat(2, 3, 4), 2) | |||
| || test_pixelshuffle(RandomMat(3, 4, 12), 2) | |||
| || test_pixelshuffle(RandomMat(2, 2, 64), 4) | |||
| || test_pixelshuffle(RandomMat(4, 4, 32), 2) | |||
| || test_pixelshuffle(RandomMat(5, 5, 48), 2) | |||
| || test_pixelshuffle(RandomMat(3, 3, 90), 3); | |||
| || test_pixelshuffle(RandomMat(3, 7, 1), 1, 0) | |||
| || test_pixelshuffle(RandomMat(2, 3, 4), 2, 0) | |||
| || test_pixelshuffle(RandomMat(3, 4, 12), 2, 0) | |||
| || test_pixelshuffle(RandomMat(2, 2, 64), 4, 0) | |||
| || test_pixelshuffle(RandomMat(4, 4, 32), 2, 0) | |||
| || test_pixelshuffle(RandomMat(5, 5, 48), 2, 0) | |||
| || test_pixelshuffle(RandomMat(3, 3, 90), 3, 0); | |||
| } | |||
| static int test_pixelshuffle_1() | |||
| { | |||
| return 0 | |||
| || test_pixelshuffle(RandomMat(3, 7, 1), 1, 1) | |||
| || test_pixelshuffle(RandomMat(2, 3, 4), 2, 1) | |||
| || test_pixelshuffle(RandomMat(3, 4, 12), 2, 1) | |||
| || test_pixelshuffle(RandomMat(2, 2, 64), 4, 1) | |||
| || test_pixelshuffle(RandomMat(4, 4, 32), 2, 1) | |||
| || test_pixelshuffle(RandomMat(5, 5, 48), 2, 1) | |||
| || test_pixelshuffle(RandomMat(3, 3, 90), 3, 1); | |||
| } | |||
| int main() | |||
| { | |||
| SRAND(7767517); | |||
| return test_pixelshuffle_0(); | |||
| return test_pixelshuffle_0() || test_pixelshuffle_1(); | |||
| } | |||
| @@ -639,6 +639,10 @@ int main(int argc, char** argv) | |||
| { | |||
| fprintf(pp, "%-16s", "Deconvolution"); | |||
| } | |||
| else if (op == "tf.DepthToSpace") | |||
| { | |||
| fprintf(pp, "%-16s", "PixelShuffle"); | |||
| } | |||
| else if (op == "tf.DepthwiseConv2dNative") | |||
| { | |||
| fprintf(pp, "%-16s", "ConvolutionDepthWise"); | |||
| @@ -1236,6 +1240,12 @@ int main(int argc, char** argv) | |||
| } | |||
| } | |||
| } | |||
| else if (op == "tf.DepthToSpace") | |||
| { | |||
| int block_size = get_operation_attr_i(operation, "block_size"); | |||
| fprintf(pp, " 0=%d", block_size); | |||
| fprintf(pp, " 1=1"); // mode | |||
| } | |||
| else if (op == "tf.DepthwiseConv2dNative") | |||
| { | |||
| std::string weight_name = get_mlir_value_uniq_id(operation.getOperand(1)); | |||
| @@ -3677,6 +3677,7 @@ int NetOptimize::save(const char* parampath, const char* binpath) | |||
| ncnn::PixelShuffle* op_default = (ncnn::PixelShuffle*)layer_default; | |||
| fprintf_param_value(" 0=%d", upscale_factor) | |||
| fprintf_param_value(" 1=%d", mode) | |||
| } | |||
| else if (layer->type == "Pooling") | |||
| { | |||
| @@ -3058,15 +3058,16 @@ int main(int argc, char** argv) | |||
| else if (op == "DepthToSpace") | |||
| { | |||
| // pixelshuffle | |||
| int scale_factor = get_node_attr_i(node, "blocksize", 1); | |||
| std::string mode = get_node_attr_s(node, "mode"); | |||
| fprintf(pp, " 0=%d", scale_factor); | |||
| if (mode == "CRD") | |||
| { | |||
| int scale_factor = get_node_attr_i(node, "blocksize", 1); | |||
| fprintf(pp, " 0=%d", scale_factor); | |||
| fprintf(pp, " 1=0"); | |||
| } | |||
| else | |||
| else if (mode == "DCR") | |||
| { | |||
| fprintf(stderr, "Unsupported DepthToSpace mode %s!\n", mode.c_str()); | |||
| fprintf(pp, " 1=1"); | |||
| } | |||
| } | |||
| else if (op == "Div") | |||