| @@ -210,12 +210,14 @@ int PackDeConvWgDataFp16(float16_t *nhwc_weight, DeConvComputeUnit *unit, ConvPa | |||
| int ret = CookToomFilter(matrix_a, matrix_at, matrix_b, matrix_bt, matrix_g, matrix_gt, 0.5f, | |||
| DECONV_WINOGRAD_DEFAULT_UNIT, unit->h_size_); | |||
| if (ret != NNACL_OK) { | |||
| free(current_unit_weight); | |||
| return NNACL_ERRCODE_WINOGRAD_GENERATOR_ERROR; | |||
| } | |||
| /* winograd AT */ | |||
| unit->winograd_.AT_ = malloc(unit->winograd_.i_ * unit->winograd_.o_ * sizeof(float16_t)); | |||
| if (unit->winograd_.AT_ == NULL) { | |||
| free(current_unit_weight); | |||
| return NNACL_NULL_PTR; | |||
| } | |||
| Float32ToFloat16(matrix_at, unit->winograd_.AT_, unit->winograd_.i_ * unit->winograd_.o_); | |||
| @@ -223,6 +225,8 @@ int PackDeConvWgDataFp16(float16_t *nhwc_weight, DeConvComputeUnit *unit, ConvPa | |||
| /* winograd BT */ | |||
| unit->winograd_.BT_ = malloc(unit->winograd_.o_ * unit->winograd_.o_ * sizeof(float16_t)); | |||
| if (unit->winograd_.BT_ == NULL) { | |||
| free(current_unit_weight); | |||
| free(unit->winograd_.AT_); | |||
| return NNACL_NULL_PTR; | |||
| } | |||
| Float32ToFloat16(matrix_bt, unit->winograd_.BT_, unit->winograd_.o_ * unit->winograd_.o_); | |||
| @@ -231,6 +235,9 @@ int PackDeConvWgDataFp16(float16_t *nhwc_weight, DeConvComputeUnit *unit, ConvPa | |||
| size = conv_param->input_channel_ * output_channel * unit->winograd_.kh_ * unit->winograd_.kw_; | |||
| float16_t *winograd_unit_weight = (float16_t *)malloc(size * sizeof(float16_t)); | |||
| if (winograd_unit_weight == NULL) { | |||
| free(current_unit_weight); | |||
| free(unit->winograd_.AT_); | |||
| free(unit->winograd_.BT_); | |||
| return NNACL_NULL_PTR; | |||
| } | |||
| @@ -49,12 +49,14 @@ int PackDeConvWgDataFp32(float *nhwc_weight, DeConvComputeUnit *unit, ConvParame | |||
| int ret = CookToomFilter(matrix_a, matrix_at, matrix_b, matrix_bt, matrix_g, matrix_gt, 0.5f, | |||
| DECONV_WINOGRAD_DEFAULT_UNIT, unit->h_size_); | |||
| if (ret != NNACL_OK) { | |||
| free(current_unit_weight); | |||
| return NNACL_ERRCODE_WINOGRAD_GENERATOR_ERROR; | |||
| } | |||
| /* winograd AT */ | |||
| unit->winograd_.AT_ = malloc(unit->winograd_.i_ * unit->winograd_.o_ * sizeof(float)); | |||
| if (unit->winograd_.AT_ == NULL) { | |||
| free(current_unit_weight); | |||
| return NNACL_NULL_PTR; | |||
| } | |||
| memcpy(unit->winograd_.AT_, matrix_at, unit->winograd_.i_ * unit->winograd_.o_ * sizeof(float)); | |||
| @@ -62,6 +64,8 @@ int PackDeConvWgDataFp32(float *nhwc_weight, DeConvComputeUnit *unit, ConvParame | |||
| /* winograd BT */ | |||
| unit->winograd_.BT_ = malloc(unit->winograd_.o_ * unit->winograd_.o_ * sizeof(float)); | |||
| if (unit->winograd_.BT_ == NULL) { | |||
| free(current_unit_weight); | |||
| free(unit->winograd_.AT_); | |||
| return NNACL_NULL_PTR; | |||
| } | |||
| memcpy(unit->winograd_.BT_, matrix_bt, unit->winograd_.o_ * unit->winograd_.o_ * sizeof(float)); | |||
| @@ -70,6 +74,9 @@ int PackDeConvWgDataFp32(float *nhwc_weight, DeConvComputeUnit *unit, ConvParame | |||
| size = conv_param->input_channel_ * conv_param->output_channel_ * unit->winograd_.kh_ * unit->winograd_.kw_; | |||
| float *winograd_unit_weight = (float *)malloc(size * sizeof(float)); | |||
| if (winograd_unit_weight == NULL) { | |||
| free(current_unit_weight); | |||
| free(unit->winograd_.AT_); | |||
| free(unit->winograd_.BT_); | |||
| return NNACL_NULL_PTR; | |||
| } | |||
| WinogradWeightTransform(current_unit_weight, winograd_unit_weight, matrix_g, matrix_gt, C4NUM, unit->winograd_.kh_, | |||
| @@ -89,9 +89,14 @@ int DetectionPostProcessBaseCPUKernel::Run() { | |||
| if (parameter->use_regular_nms_) { | |||
| parameter->score_with_class_all_ = | |||
| context_->allocator->Malloc((num_boxes + parameter->max_detections_) * sizeof(ScoreWithIndex)); | |||
| if (parameter->score_with_class_all_ == nullptr) { | |||
| MS_LOG(ERROR) << "malloc parameter->score_with_class_all_failed."; | |||
| return RET_ERROR; | |||
| } | |||
| parameter->indexes_ = context_->allocator->Malloc((num_boxes + parameter->max_detections_) * sizeof(int)); | |||
| if (!parameter->score_with_class_all_ || !parameter->indexes_) { | |||
| MS_LOG(ERROR) << "malloc parameter->score_with_class_all_ || parameter->indexes_ failed."; | |||
| if (parameter->indexes_ == nullptr) { | |||
| MS_LOG(ERROR) << "malloc parameter->indexes_ failed."; | |||
| context_->allocator->Free(parameter->score_with_class_all_); | |||
| return RET_ERROR; | |||
| } | |||
| } else { | |||
| @@ -31,11 +31,76 @@ DeConvWinogradFp16CPUKernel::~DeConvWinogradFp16CPUKernel() { | |||
| return; | |||
| } | |||
| void DeConvWinogradFp16CPUKernel::FreeResizeBuf() { return; } | |||
| void DeConvWinogradFp16CPUKernel::FreeResizeBuf() { | |||
| for (int i = 0; i < deconv_param_->compute_size_; i++) { | |||
| DeConvComputeUnit &unit = deconv_param_->compute_units_[i]; | |||
| if (unit.tmp_buffer_ != nullptr) { | |||
| free(unit.tmp_buffer_); | |||
| unit.tmp_buffer_ = nullptr; | |||
| } | |||
| if (unit.use_winograd_) { | |||
| if (unit.winograd_.b_buffer_ != nullptr) { | |||
| free(unit.winograd_.b_buffer_); | |||
| unit.winograd_.b_buffer_ = nullptr; | |||
| } | |||
| } | |||
| } | |||
| for (int i = 0; i < DECONV_WINOGRAD_BUFFER_COUNT; i++) { | |||
| DeConvWgABuffer &wg = deconv_param_->a_buffer_[i]; | |||
| if (wg.buf_init_) { | |||
| if (wg.dest_buffer_ != nullptr) { | |||
| free(wg.dest_buffer_); | |||
| wg.dest_buffer_ = nullptr; | |||
| } | |||
| if (wg.middle_buffer_ != nullptr) { | |||
| free(wg.middle_buffer_); | |||
| wg.middle_buffer_ = nullptr; | |||
| } | |||
| } | |||
| wg.buf_init_ = false; | |||
| } | |||
| if (tile_input_ != nullptr) { | |||
| free(tile_input_); | |||
| tile_input_ = nullptr; | |||
| } | |||
| if (tile_output_ != nullptr) { | |||
| free(tile_output_); | |||
| tile_output_ = nullptr; | |||
| } | |||
| if (nc4hw4_output_ != nullptr) { | |||
| free(nc4hw4_output_); | |||
| nc4hw4_output_ = nullptr; | |||
| } | |||
| return; | |||
| } | |||
| void DeConvWinogradFp16CPUKernel::FreeDeconvParam() { | |||
| for (int i = 0; i < deconv_param_->compute_size_; i++) { | |||
| DeConvComputeUnit &unit = deconv_param_->compute_units_[i]; | |||
| if (unit.weight_ != nullptr) { | |||
| free(unit.weight_); | |||
| unit.weight_ = nullptr; | |||
| } | |||
| if (unit.use_winograd_) { | |||
| if (unit.winograd_.AT_ != nullptr) { | |||
| free(unit.winograd_.AT_); | |||
| unit.winograd_.AT_ = nullptr; | |||
| } | |||
| if (unit.winograd_.BT_ != nullptr) { | |||
| free(unit.winograd_.BT_); | |||
| unit.winograd_.BT_ = nullptr; | |||
| } | |||
| } | |||
| } | |||
| if (deconv_param_ != nullptr) { | |||
| delete deconv_param_; | |||
| delete (deconv_param_); | |||
| deconv_param_ = nullptr; | |||
| } | |||
| return; | |||
| @@ -47,6 +112,9 @@ int DeConvWinogradFp16CPUKernel::InitParameter() { | |||
| nc4hw4_output_ = | |||
| reinterpret_cast<float16_t *>(malloc(deconv_param_->oc_up4_ * deconv_param_->output_plane_ * sizeof(float16_t))); | |||
| if (nc4hw4_output_ == nullptr) { | |||
| return RET_NULL_PTR; | |||
| } | |||
| deconv_param_->in_tile_w_count_ = UP_DIV(conv_param_->input_w_, DECONV_WINOGRAD_DEFAULT_UNIT); | |||
| deconv_param_->in_tile_h_count_ = UP_DIV(conv_param_->input_h_, DECONV_WINOGRAD_DEFAULT_UNIT); | |||
| @@ -62,6 +130,9 @@ int DeConvWinogradFp16CPUKernel::InitParameter() { | |||
| int size = deconv_param_->thread_num_ * DECONV_WINOGRAD_DEFAULT_UNIT * DECONV_WINOGRAD_DEFAULT_UNIT * | |||
| DECONV_WINOGRAD_DEFAULT_TILE * deconv_param_->ic_up4_; | |||
| tile_input_ = reinterpret_cast<float16_t *>(malloc(size * sizeof(float16_t))); | |||
| if (tile_input_ == nullptr) { | |||
| return RET_NULL_PTR; | |||
| } | |||
| memset(tile_input_, 0, size * sizeof(float16_t)); | |||
| deconv_param_->out_tile_w_ = (DECONV_WINOGRAD_DEFAULT_UNIT - 1) * conv_param_->stride_w_ + conv_param_->kernel_w_; | |||
| @@ -69,6 +140,9 @@ int DeConvWinogradFp16CPUKernel::InitParameter() { | |||
| size = deconv_param_->thread_num_ * deconv_param_->out_tile_w_ * deconv_param_->out_tile_h_ * | |||
| DECONV_WINOGRAD_DEFAULT_TILE * deconv_param_->oc_up4_; | |||
| tile_output_ = reinterpret_cast<float16_t *>(malloc(size * sizeof(float16_t))); | |||
| if (tile_output_ == nullptr) { | |||
| return RET_NULL_PTR; | |||
| } | |||
| for (int i = 0; i < deconv_param_->compute_size_; i++) { | |||
| DeConvComputeUnit &unit = deconv_param_->compute_units_[i]; | |||
| @@ -79,18 +153,33 @@ int DeConvWinogradFp16CPUKernel::InitParameter() { | |||
| size = unit.winograd_.kh_ * unit.winograd_.kw_ * DECONV_WINOGRAD_DEFAULT_TILE * deconv_param_->ic_up4_; | |||
| deconv_param_->a_buffer_[unit.winograd_.kh_].middle_buffer_ = | |||
| malloc(deconv_param_->thread_num_ * size * sizeof(float16_t)); | |||
| if (deconv_param_->a_buffer_[unit.winograd_.kh_].middle_buffer_ == nullptr) { | |||
| return RET_NULL_PTR; | |||
| } | |||
| deconv_param_->a_buffer_[unit.winograd_.kh_].dest_buffer_ = | |||
| malloc(deconv_param_->thread_num_ * size * sizeof(float16_t)); | |||
| if (deconv_param_->a_buffer_[unit.winograd_.kh_].dest_buffer_ == nullptr) { | |||
| return RET_NULL_PTR; | |||
| } | |||
| } | |||
| unit.winograd_.b_buffer_ = malloc(deconv_param_->thread_num_ * unit.winograd_.kh_ * unit.winograd_.kw_ * | |||
| deconv_param_->oc_up4_ * DECONV_WINOGRAD_DEFAULT_TILE * sizeof(float16_t)); | |||
| if (unit.winograd_.b_buffer_ == nullptr) { | |||
| return RET_NULL_PTR; | |||
| } | |||
| unit.tmp_buffer_ = malloc(deconv_param_->thread_num_ * unit.winograd_.kh_ * unit.winograd_.kw_ * | |||
| deconv_param_->oc_div4_ * DECONV_WINOGRAD_DEFAULT_TILE * C4NUM * sizeof(float16_t)); | |||
| if (unit.tmp_buffer_ == nullptr) { | |||
| return RET_NULL_PTR; | |||
| } | |||
| } else { | |||
| unit.tmp_buffer_ = malloc(deconv_param_->thread_num_ * deconv_param_->oc_div4_ * unit.w_size_ * unit.h_size_ * | |||
| DECONV_WINOGRAD_DEFAULT_TILE * C4NUM * sizeof(float16_t)); | |||
| if (unit.tmp_buffer_ == nullptr) { | |||
| return RET_NULL_PTR; | |||
| } | |||
| } | |||
| } | |||
| @@ -204,9 +293,15 @@ int DeConvWinogradFp16CPUKernel::InitComputeParam() { | |||
| unit.winograd_.b_buffer_ = nullptr; | |||
| unit.weight_ = malloc(unit.winograd_.kh_ * unit.winograd_.kw_ * deconv_param_->oc_up4_ * | |||
| deconv_param_->ic_up4_ * sizeof(float16_t)); | |||
| if (unit.weight_ == nullptr) { | |||
| return RET_NULL_PTR; | |||
| } | |||
| } else { | |||
| unit.use_winograd_ = false; | |||
| unit.weight_ = malloc(h_size * w_size * deconv_param_->ic_up4_ * deconv_param_->oc_up4_ * sizeof(float16_t)); | |||
| if (unit.weight_ == nullptr) { | |||
| return RET_NULL_PTR; | |||
| } | |||
| } | |||
| unit.tmp_buffer_ = nullptr; | |||
| deconv_param_->compute_units_[cur_count] = unit; | |||
| @@ -226,7 +321,7 @@ int DeConvWinogradFp16CPUKernel::InitDataParam() { | |||
| } | |||
| for (int i = 0; i < deconv_param_->compute_size_; i++) { | |||
| DeConvComputeUnit *unit = &deconv_param_->compute_units_[i]; | |||
| ret = PackDeConvWgDataFp16(fp16_weight_, unit, conv_param_, deconv_param_); | |||
| ret = PackDeConvWgDataFp16(execute_weight_, unit, conv_param_, deconv_param_); | |||
| if (ret != RET_OK) { | |||
| return ret; | |||
| } | |||
| @@ -113,6 +113,10 @@ int DeConvolutionWinogradCPUKernel::InitParameter() { | |||
| int size = deconv_param_->thread_num_ * DECONV_WINOGRAD_DEFAULT_UNIT * DECONV_WINOGRAD_DEFAULT_UNIT * | |||
| DECONV_WINOGRAD_DEFAULT_TILE * deconv_param_->ic_up4_; | |||
| tile_input_ = reinterpret_cast<float *>(malloc(size * sizeof(float))); | |||
| if (tile_input_ == nullptr) { | |||
| MS_LOG(ERROR) << "tile_input_ error!"; | |||
| return RET_NULL_PTR; | |||
| } | |||
| memset(tile_input_, 0, size * sizeof(float)); | |||
| deconv_param_->out_tile_w_ = (DECONV_WINOGRAD_DEFAULT_UNIT - 1) * conv_param_->stride_w_ + conv_param_->kernel_w_; | |||
| @@ -127,18 +131,38 @@ int DeConvolutionWinogradCPUKernel::InitParameter() { | |||
| size = unit.winograd_.kh_ * unit.winograd_.kw_ * DECONV_WINOGRAD_DEFAULT_TILE * deconv_param_->ic_up4_; | |||
| deconv_param_->a_buffer_[unit.winograd_.kh_].middle_buffer_ = | |||
| malloc(deconv_param_->thread_num_ * size * sizeof(float)); | |||
| if (deconv_param_->a_buffer_[unit.winograd_.kh_].middle_buffer_ == nullptr) { | |||
| MS_LOG(ERROR) << "middle_buffer_ error!"; | |||
| return RET_NULL_PTR; | |||
| } | |||
| deconv_param_->a_buffer_[unit.winograd_.kh_].dest_buffer_ = | |||
| malloc(deconv_param_->thread_num_ * size * sizeof(float)); | |||
| if (deconv_param_->a_buffer_[unit.winograd_.kh_].dest_buffer_ == nullptr) { | |||
| MS_LOG(ERROR) << "dest_buffer_ error!"; | |||
| return RET_NULL_PTR; | |||
| } | |||
| } | |||
| unit.winograd_.b_buffer_ = malloc(deconv_param_->thread_num_ * unit.winograd_.kh_ * unit.winograd_.kw_ * | |||
| deconv_param_->oc_up4_ * DECONV_WINOGRAD_DEFAULT_TILE * sizeof(float)); | |||
| if (unit.winograd_.b_buffer_ == nullptr) { | |||
| MS_LOG(ERROR) << "b_buffer_ error!"; | |||
| return RET_NULL_PTR; | |||
| } | |||
| unit.tmp_buffer_ = malloc(deconv_param_->thread_num_ * unit.winograd_.kh_ * unit.winograd_.kw_ * | |||
| deconv_param_->oc_div4_ * DECONV_WINOGRAD_DEFAULT_TILE * C4NUM * sizeof(float)); | |||
| if (unit.tmp_buffer_ == nullptr) { | |||
| MS_LOG(ERROR) << "tmp_buffer_ error!"; | |||
| return RET_NULL_PTR; | |||
| } | |||
| } else { | |||
| unit.tmp_buffer_ = malloc(deconv_param_->thread_num_ * deconv_param_->oc_div4_ * unit.w_size_ * unit.h_size_ * | |||
| DECONV_WINOGRAD_DEFAULT_TILE * C4NUM * sizeof(float)); | |||
| if (unit.tmp_buffer_ == nullptr) { | |||
| MS_LOG(ERROR) << "tmp_buffer_ error!"; | |||
| return RET_NULL_PTR; | |||
| } | |||
| } | |||
| } | |||
| @@ -217,9 +241,17 @@ int DeConvolutionWinogradCPUKernel::InitComputeParam() { | |||
| unit.winograd_.b_buffer_ = nullptr; | |||
| unit.weight_ = malloc(unit.winograd_.kh_ * unit.winograd_.kw_ * deconv_param_->oc_up4_ * | |||
| deconv_param_->ic_up4_ * sizeof(float)); | |||
| if (unit.weight_ == nullptr) { | |||
| MS_LOG(ERROR) << "weight_ error!"; | |||
| return RET_NULL_PTR; | |||
| } | |||
| } else { | |||
| unit.use_winograd_ = false; | |||
| unit.weight_ = malloc(h_size * w_size * deconv_param_->ic_up4_ * deconv_param_->oc_up4_ * sizeof(float)); | |||
| if (unit.weight_ == nullptr) { | |||
| MS_LOG(ERROR) << "weight_ error!"; | |||
| return RET_NULL_PTR; | |||
| } | |||
| } | |||
| unit.tmp_buffer_ = nullptr; | |||
| deconv_param_->compute_units_[cur_count] = unit; | |||
| @@ -244,6 +276,10 @@ int DeConvolutionWinogradCPUKernel::InitDataParam() { | |||
| /* bias */ | |||
| bias_data_ = malloc(deconv_param_->oc_up4_ * sizeof(float)); | |||
| if (bias_data_ == nullptr) { | |||
| MS_LOG(ERROR) << "bias_data_ error!"; | |||
| return RET_NULL_PTR; | |||
| } | |||
| memset(bias_data_, 0, deconv_param_->oc_up4_ * sizeof(float)); | |||
| if (in_tensors_.size() == 3) { | |||
| auto bias_tensor = in_tensors_.at(kBiasIndex); | |||
| @@ -28,7 +28,7 @@ using mindspore::lite::RET_OK; | |||
| using mindspore::schema::PrimitiveType_Conv2D; | |||
| namespace mindspore::kernel { | |||
| void ProcessFilterUint8(int8_t *origin_weight, int16_t *dst_weight, ConvParameter *conv_param) { | |||
| int ProcessFilterUint8(int8_t *origin_weight, int16_t *dst_weight, ConvParameter *conv_param) { | |||
| auto input_channel = conv_param->input_channel_; | |||
| auto output_channel = conv_param->output_channel_; | |||
| auto kernel_plane = conv_param->kernel_w_ * conv_param->kernel_h_; | |||
| @@ -36,11 +36,15 @@ void ProcessFilterUint8(int8_t *origin_weight, int16_t *dst_weight, ConvParamete | |||
| size_t tmp_size = output_channel * iC8 * C8NUM * kernel_plane * sizeof(int16_t); | |||
| auto tmp_addr = reinterpret_cast<int16_t *>(malloc(tmp_size)); | |||
| if (tmp_addr == nullptr) { | |||
| return RET_ERROR; | |||
| } | |||
| memset(tmp_addr, 0, tmp_size); | |||
| PackWeightToC8Int8(origin_weight, tmp_addr, conv_param); | |||
| Conv3x3Int8FilterTransform(tmp_addr, dst_weight, iC8, output_channel, kernel_plane); | |||
| free(tmp_addr); | |||
| return RET_OK; | |||
| } | |||
| void Convolution3x3Int8CPUKernel::FreeTmpBuffer() { | |||
| @@ -91,7 +95,11 @@ int Convolution3x3Int8CPUKernel::InitWeightBias() { | |||
| } | |||
| memset(transformed_filter_addr_, 0, transformed_size); | |||
| auto weight_data = reinterpret_cast<int8_t *>(in_tensors_.at(kWeightIndex)->MutableData()); | |||
| ProcessFilterUint8(weight_data, transformed_filter_addr_, conv_param_); | |||
| auto ret = ProcessFilterUint8(weight_data, transformed_filter_addr_, conv_param_); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "ProcessFilterUint8 failed."; | |||
| return ret; | |||
| } | |||
| // init bias | |||
| size_t new_bias_size = oC4 * C4NUM * sizeof(int32_t); | |||
| @@ -48,7 +48,7 @@ class Convolution3x3Int8CPUKernel : public ConvolutionBaseCPUKernel { | |||
| int32_t *tmp_dst_buffer_ = nullptr; | |||
| int8_t *tmp_out_ = nullptr; | |||
| }; | |||
| void ProcessFilterUint8(int8_t *origin_weight, int16_t *dst_weight, ConvParameter *conv_param); | |||
| int ProcessFilterUint8(int8_t *origin_weight, int16_t *dst_weight, ConvParameter *conv_param); | |||
| } // namespace mindspore::kernel | |||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_CONVOLUTION_3X3_INT8_H_ | |||
| @@ -110,9 +110,14 @@ int MulInt8CPUKernel::Run() { | |||
| count_unit_ = thread_count_ > 1 ? UP_DIV(elements_num_, thread_count_) : elements_num_; | |||
| if (in_tensors_.at(0)->ElementsNum() != in_tensors_.at(1)->ElementsNum()) { | |||
| input0_data_ = static_cast<int8_t *>(ctx_->allocator->Malloc(out_tensors_.at(0)->Size())); | |||
| if (input0_data_ == nullptr) { | |||
| MS_LOG(ERROR) << "malloc input0_data_ failed."; | |||
| return RET_ERROR; | |||
| } | |||
| input1_data_ = static_cast<int8_t *>(ctx_->allocator->Malloc(out_tensors_.at(0)->Size())); | |||
| if (!input0_data_ || !input1_data_) { | |||
| MS_LOG(ERROR) << "malloc input0_data_ || input1_data_ failed."; | |||
| if (input1_data_ == nullptr) { | |||
| MS_LOG(ERROR) << "malloc input1_data_ failed."; | |||
| ctx_->allocator->Free(input0_data_); | |||
| return RET_ERROR; | |||
| } | |||
| TileDimensionsInt8(static_cast<int8_t *>(in_tensors_.at(0)->MutableData()), | |||