|
|
|
@@ -140,10 +140,10 @@ void SWCenter(float *dst, const float *src, const float *weight, const float *bi |
|
|
|
void ConvSWFp32(const float *input_data, const float *packed_weight, const float *bias_data, float *tmp_out_block, |
|
|
|
float *output_data, int task_id, ConvParameter *conv_param, SlidingWindowParam *slidingWindow_param) { |
|
|
|
int ic4 = slidingWindow_param->ic4_channel_ / C4NUM; |
|
|
|
int ic4_res = conv_param->input_channel_ % C4NUM; |
|
|
|
int oc4_res = conv_param->output_channel_ % C4NUM; |
|
|
|
const float *src = input_data; |
|
|
|
float *dst; |
|
|
|
if (ic4_res == 0) { |
|
|
|
if (oc4_res == 0) { |
|
|
|
dst = output_data; |
|
|
|
} else { |
|
|
|
dst = tmp_out_block; |
|
|
|
@@ -183,7 +183,7 @@ void ConvSWFp32(const float *input_data, const float *packed_weight, const float |
|
|
|
dst += slidingWindow_param->out_step_; |
|
|
|
} // batch loop |
|
|
|
// output nhwc4 |
|
|
|
if (ic4_res != 0) { |
|
|
|
if (oc4_res != 0) { |
|
|
|
PackNHWC4ToNHWCFp32(tmp_out_block, output_data, conv_param->output_batch_, |
|
|
|
conv_param->output_h_ * conv_param->output_w_, conv_param->output_channel_); |
|
|
|
} |
|
|
|
|