|
|
|
@@ -91,11 +91,11 @@ __kernel void Conv2D_H1W1C1(__read_only image2d_t input, __write_only image2d_t |
|
|
|
out_h0_w0_c0 = (FLT4)(1.f) / ((FLT4)(1.f) + exp(-out_h0_w0_c0)); |
|
|
|
} |
|
|
|
|
|
|
|
#ifndef EXCEDD_MAX_IMAGE2D_WIDTH |
|
|
|
WRITE_IMAGE(output, (int2)(ow0 * CO_SLICES + co_slice0, n_oh0), out_h0_w0_c0); |
|
|
|
#else |
|
|
|
WRITE_IMAGE(output, (int2)(co_slice0, n_oh0 * OW + ow0), out_h0_w0_c0); |
|
|
|
#endif |
|
|
|
if (OW * CO_SLICES <= MAX_IMAGE2D_WIDTH) { |
|
|
|
WRITE_IMAGE(output, (int2)(ow0 * CO_SLICES + co_slice0, n_oh0), out_h0_w0_c0); |
|
|
|
} else { |
|
|
|
WRITE_IMAGE(output, (int2)(co_slice0, n_oh0 * OW + ow0), out_h0_w0_c0); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
__kernel void Conv2D_H2W1C1(__read_only image2d_t input, __write_only image2d_t output, __global FLT4 *weight, |
|
|
|
@@ -172,17 +172,17 @@ __kernel void Conv2D_H2W1C1(__read_only image2d_t input, __write_only image2d_t |
|
|
|
out_h1_w0_c0 = (FLT4)(1.f) / ((FLT4)(1.f) + exp(-out_h1_w0_c0)); |
|
|
|
} |
|
|
|
|
|
|
|
#ifndef EXCEDD_MAX_IMAGE2D_WIDTH |
|
|
|
WRITE_IMAGE(output, (int2)(ow0 * CO_SLICES + co_slice0, n_oh0), out_h0_w0_c0); |
|
|
|
if (oh1 < OH) { |
|
|
|
WRITE_IMAGE(output, (int2)(ow0 * CO_SLICES + co_slice0, n_oh1), out_h1_w0_c0); |
|
|
|
} // end if (oh1 < OH) |
|
|
|
#else |
|
|
|
WRITE_IMAGE(output, (int2)(co_slice0, n_oh0 * OW + ow0), out_h0_w0_c0); |
|
|
|
if (oh1 < OH) { |
|
|
|
WRITE_IMAGE(output, (int2)(co_slice0, n_oh1 * OW + ow0), out_h1_w0_c0); |
|
|
|
} // end (oh1 < OH) |
|
|
|
#endif |
|
|
|
if (OW * CO_SLICES <= MAX_IMAGE2D_WIDTH) { |
|
|
|
WRITE_IMAGE(output, (int2)(ow0 * CO_SLICES + co_slice0, n_oh0), out_h0_w0_c0); |
|
|
|
if (oh1 < OH) { |
|
|
|
WRITE_IMAGE(output, (int2)(ow0 * CO_SLICES + co_slice0, n_oh1), out_h1_w0_c0); |
|
|
|
} // end if (oh1 < OH) |
|
|
|
} else { |
|
|
|
WRITE_IMAGE(output, (int2)(co_slice0, n_oh0 * OW + ow0), out_h0_w0_c0); |
|
|
|
if (oh1 < OH) { |
|
|
|
WRITE_IMAGE(output, (int2)(co_slice0, n_oh1 * OW + ow0), out_h1_w0_c0); |
|
|
|
} // end (oh1 < OH) |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
__kernel void Conv2D_H2W1C2(__read_only image2d_t input, __write_only image2d_t output, __global FLT4 *weight, |
|
|
|
@@ -283,27 +283,29 @@ __kernel void Conv2D_H2W1C2(__read_only image2d_t input, __write_only image2d_t |
|
|
|
out_h1_w0_c1 = (FLT4)(1.f) / ((FLT4)(1.f) + exp(-out_h1_w0_c1)); |
|
|
|
} |
|
|
|
|
|
|
|
#ifndef EXCEDD_MAX_IMAGE2D_WIDTH |
|
|
|
WRITE_IMAGE(output, (int2)(ow0 * CO_SLICES + co_slice0, n_oh0), out_h0_w0_c0); |
|
|
|
if (oh1 < OH) { |
|
|
|
WRITE_IMAGE(output, (int2)(ow0 * CO_SLICES + co_slice0, n_oh1), out_h1_w0_c0); |
|
|
|
} // end if (oh1 < OH) |
|
|
|
if (co_slice1 < CO_SLICES) { |
|
|
|
WRITE_IMAGE(output, (int2)(ow0 * CO_SLICES + co_slice1, n_oh0), out_h0_w0_c1); |
|
|
|
if (OW * CO_SLICES <= MAX_IMAGE2D_WIDTH) { |
|
|
|
WRITE_IMAGE(output, (int2)(ow0 * CO_SLICES + co_slice0, n_oh0), out_h0_w0_c0); |
|
|
|
if (oh1 < OH) { |
|
|
|
WRITE_IMAGE(output, (int2)(ow0 * CO_SLICES + co_slice1, n_oh1), out_h1_w0_c1); |
|
|
|
WRITE_IMAGE(output, (int2)(ow0 * CO_SLICES + co_slice0, n_oh1), out_h1_w0_c0); |
|
|
|
} // end if (oh1 < OH) |
|
|
|
} // end if (co_slice1 < CO_SLICES) |
|
|
|
#else |
|
|
|
WRITE_IMAGE(output, (int2)(co_slice0, n_oh0 * OW + ow0), out_h0_w0_c0); |
|
|
|
if (oh1 < OH) { |
|
|
|
WRITE_IMAGE(output, (int2)(co_slice0, n_oh1 * OW + ow0), out_h1_w0_c0); |
|
|
|
} // end (oh1 < OH) |
|
|
|
WRITE_IMAGE(output, (int2)(co_slice1, n_oh0 * OW + ow0), out_h0_w0_c1); |
|
|
|
if (oh1 < OH) { |
|
|
|
WRITE_IMAGE(output, (int2)(co_slice1, n_oh1 * OW + ow0), out_h1_w0_c1); |
|
|
|
} // end if (oh1 < OH) |
|
|
|
#endif |
|
|
|
if (co_slice1 < CO_SLICES) { |
|
|
|
WRITE_IMAGE(output, (int2)(ow0 * CO_SLICES + co_slice1, n_oh0), out_h0_w0_c1); |
|
|
|
if (oh1 < OH) { |
|
|
|
WRITE_IMAGE(output, (int2)(ow0 * CO_SLICES + co_slice1, n_oh1), out_h1_w0_c1); |
|
|
|
} // end if (oh1 < OH) |
|
|
|
} // end if (co_slice1 < CO_SLICES) |
|
|
|
} else { |
|
|
|
WRITE_IMAGE(output, (int2)(co_slice0, n_oh0 * OW + ow0), out_h0_w0_c0); |
|
|
|
if (oh1 < OH) { |
|
|
|
WRITE_IMAGE(output, (int2)(co_slice0, n_oh1 * OW + ow0), out_h1_w0_c0); |
|
|
|
} // end (oh1 < OH) |
|
|
|
if (co_slice1 < CO_SLICES) { |
|
|
|
WRITE_IMAGE(output, (int2)(co_slice1, n_oh0 * OW + ow0), out_h0_w0_c1); |
|
|
|
if (oh1 < OH) { |
|
|
|
WRITE_IMAGE(output, (int2)(co_slice1, n_oh1 * OW + ow0), out_h1_w0_c1); |
|
|
|
} // end if (oh1 < OH) |
|
|
|
} // end if (co_slice1 < CO_SLICES) |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
__kernel void Conv2D_H2W2C2(__read_only image2d_t input, __write_only image2d_t output, __global FLT4 *weight, |
|
|
|
@@ -454,35 +456,37 @@ __kernel void Conv2D_H2W2C2(__read_only image2d_t input, __write_only image2d_t |
|
|
|
out_h1_w1_c1 = (FLT4)(1.f) / ((FLT4)(1.f) + exp(-out_h1_w1_c1)); |
|
|
|
} |
|
|
|
|
|
|
|
#ifndef EXCEDD_MAX_IMAGE2D_WIDTH |
|
|
|
WRITE_IMAGE(output, (int2)(ow0 * CO_SLICES + co_slice0, n_oh0), out_h0_w0_c0); |
|
|
|
WRITE_IMAGE(output, (int2)(ow1 * CO_SLICES + co_slice0, n_oh0), out_h0_w1_c0); |
|
|
|
if (oh1 < OH) { |
|
|
|
WRITE_IMAGE(output, (int2)(ow0 * CO_SLICES + co_slice0, n_oh1), out_h1_w0_c0); |
|
|
|
WRITE_IMAGE(output, (int2)(ow1 * CO_SLICES + co_slice0, n_oh1), out_h1_w1_c0); |
|
|
|
} // end if (oh1 < OH) |
|
|
|
if (co_slice1 < CO_SLICES) { |
|
|
|
WRITE_IMAGE(output, (int2)(ow0 * CO_SLICES + co_slice1, n_oh0), out_h0_w0_c1); |
|
|
|
WRITE_IMAGE(output, (int2)(ow1 * CO_SLICES + co_slice1, n_oh0), out_h0_w1_c1); |
|
|
|
if (OW * CO_SLICES <= MAX_IMAGE2D_WIDTH) { |
|
|
|
WRITE_IMAGE(output, (int2)(ow0 * CO_SLICES + co_slice0, n_oh0), out_h0_w0_c0); |
|
|
|
WRITE_IMAGE(output, (int2)(ow1 * CO_SLICES + co_slice0, n_oh0), out_h0_w1_c0); |
|
|
|
if (oh1 < OH) { |
|
|
|
WRITE_IMAGE(output, (int2)(ow0 * CO_SLICES + co_slice1, n_oh1), out_h1_w0_c1); |
|
|
|
WRITE_IMAGE(output, (int2)(ow1 * CO_SLICES + co_slice1, n_oh1), out_h1_w1_c1); |
|
|
|
WRITE_IMAGE(output, (int2)(ow0 * CO_SLICES + co_slice0, n_oh1), out_h1_w0_c0); |
|
|
|
WRITE_IMAGE(output, (int2)(ow1 * CO_SLICES + co_slice0, n_oh1), out_h1_w1_c0); |
|
|
|
} // end if (oh1 < OH) |
|
|
|
} // end if (co_slice1 < CO_SLICES) |
|
|
|
#else |
|
|
|
WRITE_IMAGE(output, (int2)(co_slice0, n_oh0 * OW + ow0), out_h0_w0_c0); |
|
|
|
WRITE_IMAGE(output, (int2)(co_slice0, n_oh0 * OW + ow1), out_h0_w1_c0); |
|
|
|
if (oh1 < OH) { |
|
|
|
WRITE_IMAGE(output, (int2)(co_slice0, n_oh1 * OW + ow0), out_h1_w0_c0); |
|
|
|
WRITE_IMAGE(output, (int2)(co_slice0, n_oh1 * OW + ow1), out_h1_w1_c0); |
|
|
|
} // end (oh1 < OH) |
|
|
|
WRITE_IMAGE(output, (int2)(co_slice1, n_oh0 * OW + ow0), out_h0_w0_c1); |
|
|
|
WRITE_IMAGE(output, (int2)(co_slice1, n_oh0 * OW + ow1), out_h0_w1_c1); |
|
|
|
if (oh1 < OH) { |
|
|
|
WRITE_IMAGE(output, (int2)(co_slice1, n_oh1 * OW + ow0), out_h1_w0_c1); |
|
|
|
WRITE_IMAGE(output, (int2)(co_slice1, n_oh1 * OW + ow1), out_h1_w1_c1); |
|
|
|
} // end if (oh1 < OH) |
|
|
|
#endif |
|
|
|
if (co_slice1 < CO_SLICES) { |
|
|
|
WRITE_IMAGE(output, (int2)(ow0 * CO_SLICES + co_slice1, n_oh0), out_h0_w0_c1); |
|
|
|
WRITE_IMAGE(output, (int2)(ow1 * CO_SLICES + co_slice1, n_oh0), out_h0_w1_c1); |
|
|
|
if (oh1 < OH) { |
|
|
|
WRITE_IMAGE(output, (int2)(ow0 * CO_SLICES + co_slice1, n_oh1), out_h1_w0_c1); |
|
|
|
WRITE_IMAGE(output, (int2)(ow1 * CO_SLICES + co_slice1, n_oh1), out_h1_w1_c1); |
|
|
|
} // end if (oh1 < OH) |
|
|
|
} // end if (co_slice1 < CO_SLICES) |
|
|
|
} else { |
|
|
|
WRITE_IMAGE(output, (int2)(co_slice0, n_oh0 * OW + ow0), out_h0_w0_c0); |
|
|
|
WRITE_IMAGE(output, (int2)(co_slice0, n_oh0 * OW + ow1), out_h0_w1_c0); |
|
|
|
if (oh1 < OH) { |
|
|
|
WRITE_IMAGE(output, (int2)(co_slice0, n_oh1 * OW + ow0), out_h1_w0_c0); |
|
|
|
WRITE_IMAGE(output, (int2)(co_slice0, n_oh1 * OW + ow1), out_h1_w1_c0); |
|
|
|
} // end (oh1 < OH) |
|
|
|
if (co_slice1 < CO_SLICES) { |
|
|
|
WRITE_IMAGE(output, (int2)(co_slice1, n_oh0 * OW + ow0), out_h0_w0_c1); |
|
|
|
WRITE_IMAGE(output, (int2)(co_slice1, n_oh0 * OW + ow1), out_h0_w1_c1); |
|
|
|
if (oh1 < OH) { |
|
|
|
WRITE_IMAGE(output, (int2)(co_slice1, n_oh1 * OW + ow0), out_h1_w0_c1); |
|
|
|
WRITE_IMAGE(output, (int2)(co_slice1, n_oh1 * OW + ow1), out_h1_w1_c1); |
|
|
|
} // end if (oh1 < OH) |
|
|
|
} // end if (co_slice1 < CO_SLICES) |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
__kernel void Conv2D_H2W2C2_Img(__read_only image2d_t input, __write_only image2d_t output, |
|
|
|
@@ -640,33 +644,35 @@ __kernel void Conv2D_H2W2C2_Img(__read_only image2d_t input, __write_only image2 |
|
|
|
out_h1_w1_c1 = (FLT4)(1.f) / ((FLT4)(1.f) + exp(-out_h1_w1_c1)); |
|
|
|
} |
|
|
|
|
|
|
|
#ifndef EXCEDD_MAX_IMAGE2D_WIDTH |
|
|
|
WRITE_IMAGE(output, (int2)(ow0 * CO_SLICES + co_slice0, n_oh0), out_h0_w0_c0); |
|
|
|
WRITE_IMAGE(output, (int2)(ow1 * CO_SLICES + co_slice0, n_oh0), out_h0_w1_c0); |
|
|
|
if (oh1 < OH) { |
|
|
|
WRITE_IMAGE(output, (int2)(ow0 * CO_SLICES + co_slice0, n_oh1), out_h1_w0_c0); |
|
|
|
WRITE_IMAGE(output, (int2)(ow1 * CO_SLICES + co_slice0, n_oh1), out_h1_w1_c0); |
|
|
|
} // end if (oh1 < OH) |
|
|
|
if (co_slice1 < CO_SLICES) { |
|
|
|
WRITE_IMAGE(output, (int2)(ow0 * CO_SLICES + co_slice1, n_oh0), out_h0_w0_c1); |
|
|
|
WRITE_IMAGE(output, (int2)(ow1 * CO_SLICES + co_slice1, n_oh0), out_h0_w1_c1); |
|
|
|
if (OW * CO_SLICES <= MAX_IMAGE2D_WIDTH) { |
|
|
|
WRITE_IMAGE(output, (int2)(ow0 * CO_SLICES + co_slice0, n_oh0), out_h0_w0_c0); |
|
|
|
WRITE_IMAGE(output, (int2)(ow1 * CO_SLICES + co_slice0, n_oh0), out_h0_w1_c0); |
|
|
|
if (oh1 < OH) { |
|
|
|
WRITE_IMAGE(output, (int2)(ow0 * CO_SLICES + co_slice1, n_oh1), out_h1_w0_c1); |
|
|
|
WRITE_IMAGE(output, (int2)(ow1 * CO_SLICES + co_slice1, n_oh1), out_h1_w1_c1); |
|
|
|
WRITE_IMAGE(output, (int2)(ow0 * CO_SLICES + co_slice0, n_oh1), out_h1_w0_c0); |
|
|
|
WRITE_IMAGE(output, (int2)(ow1 * CO_SLICES + co_slice0, n_oh1), out_h1_w1_c0); |
|
|
|
} // end if (oh1 < OH) |
|
|
|
} // end if (co_slice1 < CO_SLICES) |
|
|
|
#else |
|
|
|
WRITE_IMAGE(output, (int2)(co_slice0, n_oh0 * OW + ow0), out_h0_w0_c0); |
|
|
|
WRITE_IMAGE(output, (int2)(co_slice0, n_oh0 * OW + ow1), out_h0_w1_c0); |
|
|
|
if (oh1 < OH) { |
|
|
|
WRITE_IMAGE(output, (int2)(co_slice0, n_oh1 * OW + ow0), out_h1_w0_c0); |
|
|
|
WRITE_IMAGE(output, (int2)(co_slice0, n_oh1 * OW + ow1), out_h1_w1_c0); |
|
|
|
} // end (oh1 < OH) |
|
|
|
WRITE_IMAGE(output, (int2)(co_slice1, n_oh0 * OW + ow0), out_h0_w0_c1); |
|
|
|
WRITE_IMAGE(output, (int2)(co_slice1, n_oh0 * OW + ow1), out_h0_w1_c1); |
|
|
|
if (oh1 < OH) { |
|
|
|
WRITE_IMAGE(output, (int2)(co_slice1, n_oh1 * OW + ow0), out_h1_w0_c1); |
|
|
|
WRITE_IMAGE(output, (int2)(co_slice1, n_oh1 * OW + ow1), out_h1_w1_c1); |
|
|
|
} // end if (oh1 < OH) |
|
|
|
#endif |
|
|
|
if (co_slice1 < CO_SLICES) { |
|
|
|
WRITE_IMAGE(output, (int2)(ow0 * CO_SLICES + co_slice1, n_oh0), out_h0_w0_c1); |
|
|
|
WRITE_IMAGE(output, (int2)(ow1 * CO_SLICES + co_slice1, n_oh0), out_h0_w1_c1); |
|
|
|
if (oh1 < OH) { |
|
|
|
WRITE_IMAGE(output, (int2)(ow0 * CO_SLICES + co_slice1, n_oh1), out_h1_w0_c1); |
|
|
|
WRITE_IMAGE(output, (int2)(ow1 * CO_SLICES + co_slice1, n_oh1), out_h1_w1_c1); |
|
|
|
} // end if (oh1 < OH) |
|
|
|
} // end if (co_slice1 < CO_SLICES) |
|
|
|
} else { |
|
|
|
WRITE_IMAGE(output, (int2)(co_slice0, n_oh0 * OW + ow0), out_h0_w0_c0); |
|
|
|
WRITE_IMAGE(output, (int2)(co_slice0, n_oh0 * OW + ow1), out_h0_w1_c0); |
|
|
|
if (oh1 < OH) { |
|
|
|
WRITE_IMAGE(output, (int2)(co_slice0, n_oh1 * OW + ow0), out_h1_w0_c0); |
|
|
|
WRITE_IMAGE(output, (int2)(co_slice0, n_oh1 * OW + ow1), out_h1_w1_c0); |
|
|
|
} // end (oh1 < OH) |
|
|
|
if (co_slice1 < CO_SLICES) { |
|
|
|
WRITE_IMAGE(output, (int2)(co_slice1, n_oh0 * OW + ow0), out_h0_w0_c1); |
|
|
|
WRITE_IMAGE(output, (int2)(co_slice1, n_oh0 * OW + ow1), out_h0_w1_c1); |
|
|
|
if (oh1 < OH) { |
|
|
|
WRITE_IMAGE(output, (int2)(co_slice1, n_oh1 * OW + ow0), out_h1_w0_c1); |
|
|
|
WRITE_IMAGE(output, (int2)(co_slice1, n_oh1 * OW + ow1), out_h1_w1_c1); |
|
|
|
} // end if (oh1 < OH) |
|
|
|
} // end if (co_slice1 < CO_SLICES) |
|
|
|
} |
|
|
|
} |