diff --git a/mindspore/lite/nnacl/fp32/arithmetic_fp32.c b/mindspore/lite/nnacl/fp32/arithmetic_fp32.c index 770815589d..fdbdc76a2e 100644 --- a/mindspore/lite/nnacl/fp32/arithmetic_fp32.c +++ b/mindspore/lite/nnacl/fp32/arithmetic_fp32.c @@ -940,6 +940,14 @@ int ElementLogicalAndInt(const int *input0, const int *input1, int *output, cons return NNACL_OK; } +int ElementLogicalAndBool(const bool *input0, const bool *input1, bool *output, const int element_size) { + int index = 0; + for (; index < element_size; index++) { + output[index] = (bool)((bool)(input0[index]) & (bool)(input1[index])); + } + return NNACL_OK; +} + int ElementSquaredDifference(const float *input0, const float *input1, float *output, const int element_size) { ElementSub(input0, input1, output, element_size); return ElementMul(output, output, output, element_size); diff --git a/mindspore/lite/nnacl/fp32/arithmetic_fp32.h b/mindspore/lite/nnacl/fp32/arithmetic_fp32.h index e2d1ac6a28..f699fe9582 100644 --- a/mindspore/lite/nnacl/fp32/arithmetic_fp32.h +++ b/mindspore/lite/nnacl/fp32/arithmetic_fp32.h @@ -93,6 +93,7 @@ int BroadcastDiv(const float *input0, const float *input1, float *tile_input0, f int ElementLogicalAnd(const float *input0, const float *input1, float *output, const int element_size); int ElementLogicalAndInt(const int *input0, const int *input1, int *output, const int element_size); +int ElementLogicalAndBool(const bool *input0, const bool *input1, bool *output, const int element_size); int BroadcastLogicalAnd(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, int element_size, ArithmeticParameter *param); diff --git a/mindspore/lite/src/lite_kernel.cc b/mindspore/lite/src/lite_kernel.cc index 98e0a4ce61..e36def21fd 100644 --- a/mindspore/lite/src/lite_kernel.cc +++ b/mindspore/lite/src/lite_kernel.cc @@ -43,9 +43,9 @@ void LiteKernel::FreeWorkspace() { } #endif bool LiteKernel::IsReady(const std::vector &scope_tensors) { - return std::all_of(this->in_tensors().begin(), this->in_tensors().end(), [&](lite::Tensor *kernel_in_tensor) { - if (IsContain(scope_tensors, kernel_in_tensor)) { - return (kernel_in_tensor->IsConst() || kernel_in_tensor->IsGraphInput() || kernel_in_tensor->ref_count() >= 1); + return std::all_of(this->in_tensors().begin(), this->in_tensors().end(), [&](lite::Tensor *in_tensor) { + if (IsContain(scope_tensors, in_tensor)) { + return in_tensor->IsReady(); } else { return true; } @@ -66,13 +66,9 @@ void LiteKernel::InitOutTensorInitRefCount() { int LiteKernel::DecOutTensorRefCount() { for (auto *tensor : this->out_tensors_) { - tensor->DecRefCount(); + tensor->set_ref_count(tensor->ref_count() - 1); if (0 >= tensor->ref_count()) { - auto ret = tensor->FreeData(); - if (0 != ret) { - MS_LOG(ERROR) << "Free tensor data failed"; - return ret; - } + tensor->FreeData(); } } return 0; @@ -81,18 +77,10 @@ int LiteKernel::DecOutTensorRefCount() { int LiteKernel::FreeInWorkTensor() const { for (auto &in_tensor : this->in_tensors_) { MS_ASSERT(in_tensor != nullptr); - if (in_tensor->IsConst() || in_tensor->IsGraphInput()) { + if (in_tensor->root_tensor() == in_tensor) { continue; } - MS_ASSERT(in_tensor->ref_count() > 0); - in_tensor->set_ref_count(in_tensor->ref_count() - 1); - if (in_tensor->ref_count() <= 0) { - auto ret = in_tensor->FreeData(); - if (0 != ret) { - MS_LOG(ERROR) << "Free tensor data failed"; - return ret; - } - } + in_tensor->DecRefCount(); } return RET_OK; } diff --git a/mindspore/lite/src/lite_kernel.h b/mindspore/lite/src/lite_kernel.h index 8af23f0cc2..f8bdd298e8 100644 --- a/mindspore/lite/src/lite_kernel.h +++ b/mindspore/lite/src/lite_kernel.h @@ -157,7 +157,7 @@ class LiteKernel { const std::vector &out_kernels() const { return this->out_kernels_; } - virtual bool IsReady(const std::vector &scope_tensors); + virtual bool IsReady(const std::vector &in_tensor); virtual void InitOutTensorInitRefCount(); diff --git a/mindspore/lite/src/lite_session.cc b/mindspore/lite/src/lite_session.cc index 7ddc0296e1..09fc2401d2 100644 --- a/mindspore/lite/src/lite_session.cc +++ b/mindspore/lite/src/lite_session.cc @@ -143,7 +143,7 @@ lite::Tensor *LiteSession::ConvertTensor(const schema::Tensor &src_tensor) { } } } - lite::Tensor *dst_tensor = nullptr; + lite::Tensor *dst_tensor; if (TypeId(src_tensor.dataType()) == kObjectTypeTensorType) { dst_tensor = new (std::nothrow) TensorList(shape, std::vector(), src_category); } else { diff --git a/mindspore/lite/src/ops/merge.cc b/mindspore/lite/src/ops/merge.cc index 7dd0397234..a959f45d6f 100644 --- a/mindspore/lite/src/ops/merge.cc +++ b/mindspore/lite/src/ops/merge.cc @@ -18,6 +18,7 @@ #ifndef PRIMITIVE_WRITEABLE #include "src/ops/ops_register.h" #endif +#include "src/tensorlist.h" namespace mindspore { namespace lite { @@ -72,8 +73,29 @@ int Merge::InferShape(std::vector inputs_, std::vector outpu return RET_INFER_INVALID; } for (size_t i = 0; i < inputs_.size() / 2; i++) { - outputs_[i]->set_data_type(inputs_[i]->data_type()); - outputs_[i]->set_shape(inputs_[i]->shape()); + auto *input = inputs_[i]; + auto *output = outputs_[i]; + if (input == nullptr) { + MS_LOG(ERROR) << "input tensor is nullptr"; + return RET_ERROR; + } + if (output == nullptr) { + MS_LOG(ERROR) << "output tensor is nullptr"; + return RET_ERROR; + } + output->set_data_type(input->data_type()); + output->set_shape(input->shape()); + output->set_format(input->format()); + auto data_type = input->data_type(); + if (data_type != kObjectTypeTensorType) { + continue; + } else { + auto input_tensorlist = reinterpret_cast(input); + auto output_tensorlist = reinterpret_cast(output); + output_tensorlist->set_element_shape(input_tensorlist->element_shape()); + output_tensorlist->set_max_elements_num(input_tensorlist->max_elements_num()); + output_tensorlist->set_tensors_data_type(input_tensorlist->tensors_data_type()); + } } return RET_OK; } diff --git a/mindspore/lite/src/ops/populate/split_populate.cc b/mindspore/lite/src/ops/populate/split_populate.cc index 3726dd0451..9fcd931506 100644 --- a/mindspore/lite/src/ops/populate/split_populate.cc +++ b/mindspore/lite/src/ops/populate/split_populate.cc @@ -48,7 +48,6 @@ OpParameter *PopulateSplitParameter(const mindspore::lite::PrimitiveC *primitive memset(split_param->split_sizes_, 0, split_param->num_split_ * sizeof(int)); auto split_sizes_vector_ = param->size_splits(); - MS_ASSERT(split_sizes_vector_.size() == split_param->num_split_); for (size_t i = 0; i < split_sizes_vector_.size(); i++) { split_param->split_sizes_[i] = split_sizes_vector_[i]; } diff --git a/mindspore/lite/src/ops/switch.cc b/mindspore/lite/src/ops/switch.cc index 2135f5fbb7..eacbd2cf7e 100644 --- a/mindspore/lite/src/ops/switch.cc +++ b/mindspore/lite/src/ops/switch.cc @@ -19,6 +19,7 @@ #ifndef PRIMITIVE_WRITEABLE #include "src/ops/ops_register.h" #endif +#include "src/tensorlist.h" namespace mindspore { namespace lite { @@ -76,12 +77,37 @@ int Switch::InferShape(std::vector inputs_, std::vector outp return RET_INFER_INVALID; } for (size_t i = 0; i < outputs_.size() / 2; i++) { - outputs_[i]->set_data_type(inputs_[i + 1]->data_type()); - outputs_[i + outputs_.size() / 2]->set_data_type(inputs_[i + 1]->data_type()); - outputs_[i]->set_shape(inputs_[i + 1]->shape()); - outputs_[i + outputs_.size() / 2]->set_shape(inputs_[i + 1]->shape()); - outputs_[i]->set_format(inputs_[i + 1]->format()); - outputs_[i + outputs_.size() / 2]->set_format(inputs_[i + 1]->format()); + auto *input = inputs_[i + 1]; + auto *output_true = outputs_[i]; + auto *output_false = outputs_[i + outputs_.size() / 2]; + if (input == nullptr) { + MS_LOG(ERROR) << "input tensor is nullptr"; + return RET_ERROR; + } + if (output_true == nullptr || output_false == nullptr) { + MS_LOG(ERROR) << "output tensor is nullptr"; + return RET_ERROR; + } + output_true->set_data_type(input->data_type()); + output_false->set_data_type(input->data_type()); + output_true->set_shape(input->shape()); + output_false->set_shape(input->shape()); + output_true->set_format(input->format()); + output_false->set_format(input->format()); + auto data_type = input->data_type(); + if (data_type != kObjectTypeTensorType) { + continue; + } else { + auto input_tensorlist = reinterpret_cast(input); + auto output_true_tensorlist = reinterpret_cast(output_true); + auto output_false_tensorlist = reinterpret_cast(output_false); + output_true_tensorlist->set_element_shape(input_tensorlist->element_shape()); + output_false_tensorlist->set_element_shape(input_tensorlist->element_shape()); + output_true_tensorlist->set_max_elements_num(input_tensorlist->max_elements_num()); + output_false_tensorlist->set_max_elements_num(input_tensorlist->max_elements_num()); + output_true_tensorlist->set_tensors_data_type(input_tensorlist->tensors_data_type()); + output_false_tensorlist->set_tensors_data_type(input_tensorlist->tensors_data_type()); + } } return RET_OK; } diff --git a/mindspore/lite/src/ops/tensorlist_getitem.cc b/mindspore/lite/src/ops/tensorlist_getitem.cc index 5e9f8627d2..fee1c6ae45 100644 --- a/mindspore/lite/src/ops/tensorlist_getitem.cc +++ b/mindspore/lite/src/ops/tensorlist_getitem.cc @@ -136,7 +136,7 @@ int TensorListGetItem::InferShape(std::vector inputs_, std::vect MS_LOG(ERROR) << "index_:" << index_ << "must in [0, " << input0->ElementsNum() - 1 << "]"; return RET_ERROR; } - auto tensor_index = input0->GetTensorIndex(index_); + auto tensor_index = input0->GetTensor(index_); MS_ASSERT(tensor_index != nullptr); auto output = outputs_.front(); MS_ASSERT(output != nullptr); @@ -159,7 +159,7 @@ int TensorListGetItem::InferShape(std::vector inputs_, std::vect } if (!IsFullyDefined(element_shape_)) { for (int i = 0; i < input0->ElementsNum(); ++i) { - auto input = input0->GetTensorIndex(i); + auto input = input0->GetTensor(i); MS_ASSERT(input != nullptr); if (input->data_type() != kTypeUnknown) { status = MergeShape(input->shape()); diff --git a/mindspore/lite/src/ops/tensorlist_setitem.cc b/mindspore/lite/src/ops/tensorlist_setitem.cc index 34969c44ed..b753237762 100644 --- a/mindspore/lite/src/ops/tensorlist_setitem.cc +++ b/mindspore/lite/src/ops/tensorlist_setitem.cc @@ -140,7 +140,7 @@ int TensorListSetItem::InferShape(std::vector inputs_, std::vect } else { output0->set_shape(input0->shape()); for (int i = 0; i < input0->ElementsNum(); ++i) { - auto src_ptr = input0->GetTensorIndex(i); + auto src_ptr = input0->GetTensor(i); if (src_ptr == nullptr) { MS_LOG(ERROR) << "input0->tensors_[" << i << "] is nullptr!"; return RET_ERROR; diff --git a/mindspore/lite/src/ops/tensorlist_stack.cc b/mindspore/lite/src/ops/tensorlist_stack.cc index 05d46578b9..9e06b912fd 100644 --- a/mindspore/lite/src/ops/tensorlist_stack.cc +++ b/mindspore/lite/src/ops/tensorlist_stack.cc @@ -133,6 +133,7 @@ int TensorListStack::InferShape(std::vector inputs_, std::vector return RET_NULL_PTR; } auto ele_shape_ptr = reinterpret_cast(ele_shape->data_c()); + output_shape_.clear(); for (int i = 0; i < ele_shape->ElementsNum(); ++i) { output_shape_.push_back(ele_shape_ptr[i]); } @@ -148,7 +149,7 @@ int TensorListStack::InferShape(std::vector inputs_, std::vector } if (!IsFullyDefined(input0->element_shape())) { for (int i = 0; i < input0->ElementsNum(); ++i) { - auto tensor_ele = input0->GetTensorIndex(i); + auto tensor_ele = input0->GetTensor(i); MS_ASSERT(tensor_ele != nullptr); if (tensor_ele->data_type() != kTypeUnknown) { status = MergeShape(tensor_ele->shape()); diff --git a/mindspore/lite/src/runtime/agent/npu/npu_executor.cc b/mindspore/lite/src/runtime/agent/npu/npu_executor.cc index 34effac772..5125e88754 100644 --- a/mindspore/lite/src/runtime/agent/npu/npu_executor.cc +++ b/mindspore/lite/src/runtime/agent/npu/npu_executor.cc @@ -62,11 +62,7 @@ int NPUExecutor::Run(const std::vector &in_tensors, const std::vector< memcpy(npu_input_tensors_[i]->GetBuffer(), data, in_tensors[index]->Size()); in_tensors[index]->set_ref_count(in_tensors[index]->ref_count() - 1); if (in_tensors[index]->ref_count() <= 0) { - auto ret = in_tensors[index]->FreeData(); - if (ret != RET_OK) { - MS_LOG(ERROR) << "Free tensor data failed"; - return RET_ERROR; - } + in_tensors[index]->FreeData(); } break; } diff --git a/mindspore/lite/src/runtime/kernel/arm/base/carry_data.cc b/mindspore/lite/src/runtime/kernel/arm/base/carry_data.cc new file mode 100644 index 0000000000..509404c68d --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/arm/base/carry_data.cc @@ -0,0 +1,108 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "src/runtime/kernel/arm/base/carry_data.h" +#include "include/errorcode.h" +#include "src/tensorlist.h" + +using mindspore::lite::RET_ERROR; +using mindspore::lite::RET_OK; + +namespace mindspore::kernel { +int CarryDataKernel::MoveData(std::vector::iterator dst_begin, + std::vector::iterator dst_end, + std::vector::iterator src_begin, + std::vector::iterator src_limit) { + for (auto dst_iter = dst_begin, src_iter = src_begin; dst_iter != dst_end; dst_iter++, src_iter++) { + if (src_iter == src_limit) { + MS_LOG(ERROR) << "out of range of input tensor"; + return RET_ERROR; + } + auto *dst_tensor = *dst_iter; + auto *src_tensor = *src_iter; + if (dst_tensor == nullptr || src_tensor == nullptr) { + MS_LOG(ERROR) << "input tensor or output tensor of merge is nullptr"; + return RET_ERROR; + } + lite::STATUS ret; + if (src_tensor->data_type() == kObjectTypeTensorType && dst_tensor->data_type() == kObjectTypeTensorType) { + ret = MoveTensorLiteData(reinterpret_cast(dst_tensor), + reinterpret_cast(src_tensor)); + } else { + ret = MoveTensorData(dst_tensor, src_tensor); + } + if (ret != RET_OK) { + MS_LOG(ERROR) << "Move data failed : " << ret; + return ret; + } + } + return RET_OK; +} + +int CarryDataKernel::MoveTensorData(lite::Tensor *dst_tensor, lite::Tensor *src_tensor) { + if (dst_tensor->data_type() != src_tensor->data_type() || dst_tensor->format() != src_tensor->format() || + !(dst_tensor->shape() == src_tensor->shape() || (dst_tensor->shape().empty() && src_tensor->shape().empty()))) { + MS_LOG(ERROR) << "input tensor and output tensor is incompatible"; + return RET_ERROR; + } + if (src_tensor->root_tensor() == nullptr) { + if (src_tensor->IsConst() || src_tensor->IsGraphInput() || src_tensor->ref_count() > 1) { + auto dst_data = dst_tensor->MutableData(); + if (dst_data == nullptr) { + MS_LOG(ERROR) << "data of dst tensor is nullptr"; + return RET_ERROR; + } + auto src_data = src_tensor->data_c(); + MS_ASSERT(src_data != nullptr); + memcpy(dst_data, src_data, dst_tensor->Size()); + } else { + dst_tensor->FreeData(); + dst_tensor->set_data(src_tensor->data_c()); + src_tensor->set_data(nullptr); + } + } else { + auto ret = dst_tensor->set_root_tensor(src_tensor->root_tensor()); + if (ret != RET_OK) { + MS_LOG(ERROR) << "Set root tensor for tensor(" << dst_tensor->tensor_name() << ") failed"; + return ret; + } + } + return RET_OK; +} + +int CarryDataKernel::MoveTensorLiteData(lite::TensorList *dst_tensor, lite::TensorList *src_tensor) { + // shape may change, because tensors.size() can be change in RunGraph + if (dst_tensor->data_type() != src_tensor->data_type() || dst_tensor->format() != src_tensor->format() || + !(dst_tensor->element_shape() == src_tensor->element_shape() || + (dst_tensor->element_shape().empty() && src_tensor->element_shape().empty())) || + dst_tensor->tensors_data_type() != src_tensor->tensors_data_type()) { + MS_LOG(ERROR) << "input tensorlist and output tensorlist is incompatible"; + return RET_ERROR; + } + if (src_tensor->root_tensor() == nullptr) { + dst_tensor->CopyTensorList(*src_tensor, false); + src_tensor->set_tensors({}); + } else { + dst_tensor->set_shape(src_tensor->shape()); + auto ret = dst_tensor->set_root_tensor(src_tensor->root_tensor()); + if (ret != RET_OK) { + MS_LOG(ERROR) << "Set root tensor for tensor(" << dst_tensor->tensor_name() << ") failed"; + return ret; + } + } + return RET_OK; +} +} // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/base/carry_data.h b/mindspore/lite/src/runtime/kernel/arm/base/carry_data.h new file mode 100644 index 0000000000..ba960b772b --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/arm/base/carry_data.h @@ -0,0 +1,41 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_CARRY_DATA_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_CARRY_DATA_H_ + +#include +#include "src/lite_kernel.h" +#include "src/tensor.h" +#include "src/tensorlist.h" + +namespace mindspore::kernel { +class CarryDataKernel : public LiteKernel { + public: + CarryDataKernel(OpParameter *parameter, const std::vector &inputs, + const std::vector &outputs, const lite::InnerContext *ctx, + const mindspore::lite::PrimitiveC *primitive) + : LiteKernel(parameter, inputs, outputs, ctx, primitive) {} + ~CarryDataKernel() override = default; + + protected: + int MoveData(std::vector::iterator dst_begin, std::vector::iterator dst_end, + std::vector::iterator src_begin, std::vector::iterator src_limit); + static int MoveTensorData(lite::Tensor *dst_tensor, lite::Tensor *src_tensor); + static int MoveTensorLiteData(lite::TensorList *dst_tensor, lite::TensorList *src_tensor); +}; +} // namespace mindspore::kernel + +#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_CARRY_DATA_H_ diff --git a/mindspore/lite/src/runtime/kernel/arm/base/merge.cc b/mindspore/lite/src/runtime/kernel/arm/base/merge.cc index 85b1486fb1..43373dcf7a 100644 --- a/mindspore/lite/src/runtime/kernel/arm/base/merge.cc +++ b/mindspore/lite/src/runtime/kernel/arm/base/merge.cc @@ -18,6 +18,7 @@ #include "src/kernel_registry.h" #include "include/errorcode.h" #include "src/tensorlist.h" +#include "src/common/utils.h" using mindspore::lite::KernelRegistrar; using mindspore::lite::RET_ERROR; @@ -28,104 +29,97 @@ namespace mindspore::kernel { int MergeCPUKernel::FreeInWorkTensor() const { for (auto &in_tensor : this->in_tensors_) { MS_ASSERT(in_tensor != nullptr); - if (in_tensor->IsConst() || in_tensor->IsGraphInput()) { + if (in_tensor->root_tensor() == in_tensor) { continue; } - if (in_tensor->ref_count() > 0) { - in_tensor->set_ref_count(in_tensor->ref_count() - 1); - if (in_tensor->ref_count() <= 0) { - auto ret = in_tensor->FreeData(); - if (0 != ret) { - MS_LOG(ERROR) << "Free tensor data failed"; - return ret; - } - } - } + in_tensor->DecRefCount(); } return RET_OK; } -// if one of input of merge is const-tensor, merge is always ready, this will cause error. bool MergeCPUKernel::IsReady(const std::vector &scope_tensors) { - MS_ASSERT(in_tensors().size() == 2 * out_tensors().size()); - return std::all_of(this->in_tensors().begin(), this->in_tensors().begin() + in_tensors().size() / 2, - [&](lite::Tensor *kernel_in_tensor) { - return kernel_in_tensor->IsConst() || kernel_in_tensor->IsGraphInput() || - kernel_in_tensor->ref_count() >= 1; - }) || - std::all_of(this->in_tensors().begin() + in_tensors().size() / 2, this->in_tensors().end(), - [&](lite::Tensor *kernel_in_tensor) { - return kernel_in_tensor->IsConst() || kernel_in_tensor->IsGraphInput() || - kernel_in_tensor->ref_count() >= 1 || - (kernel_in_tensor->data_type() == kObjectTypeTensorType); - }); + auto ready_part = FindReadyPart(scope_tensors); + return ready_part == LEFT_INPUT_PART || ready_part == RIGHT_INPUT_PART; } -int MergeCPUKernel::Init() { return RET_OK; } +int MergeCPUKernel::Init() { + MS_ASSERT(in_tensors_.size() == 2 * out_tensors_.size()); + size_t stride = in_tensors_.size() / 2; + for (size_t i = 0; i < in_tensors_.size() / 2; i++) { + MS_ASSERT(in_tensors_[i] != nullptr); + MS_ASSERT(in_tensors_[i + stride] != nullptr); + if (in_tensors_[i] == in_tensors_[i + stride]) { + auto ret = in_tensors_[i]->set_root_tensor(in_tensors_[i]); + if (ret != RET_OK) { + MS_LOG(ERROR) << "Set root tensor for tensor(" << in_tensors_[i]->tensor_name() << ") failed"; + return ret; + } + ret = in_tensors_[i + stride]->set_root_tensor(in_tensors_[i + stride]); + if (ret != RET_OK) { + MS_LOG(ERROR) << "Set root tensor for tensor(" << in_tensors_[i + stride]->tensor_name() << ") failed"; + return ret; + } + } + } + return RET_OK; +} int MergeCPUKernel::ReSize() { return RET_OK; } -bool MergeCPUKernel::PartialInputReady(int num_begin, int num_end) { +InputPart MergeCPUKernel::FindReadyPart(const std::vector &scope_tensors) { MS_ASSERT(in_tensors_.size() == 2 * out_tensors_.size()); - bool result = (std::all_of(this->in_tensors().begin() + num_begin, this->in_tensors().begin() + num_end, - [&](lite::Tensor *kernel_in_tensor) { - return kernel_in_tensor->IsConst() || kernel_in_tensor->ref_count() >= 1 || - kernel_in_tensor->IsGraphInput() || - kernel_in_tensor->data_type() == kObjectTypeTensorType; - })) && - std::all_of(this->in_tensors_.begin() + num_begin, this->in_tensors_.begin() + num_end, - [&](lite::Tensor *in_tensor) { - if (in_tensor->data_type() != kObjectTypeTensorType) { - return in_tensor->data_c() != nullptr; - } else { - return true; - } - }); - return result; + bool is_root_tensor_ready = + std::all_of(this->in_tensors().begin(), this->in_tensors().end(), [&](lite::Tensor *in_tensor) { + // if not in scope_tensors, not care + if (!IsContain(scope_tensors, in_tensor)) { + return true; + } + // if not a root_tensor, not care + if (in_tensor->root_tensor() == nullptr || in_tensor->root_tensor() != in_tensor) { + return true; + } + return in_tensor->IsReady(); + }); + // check if all root tensor is ready + if (!is_root_tensor_ready) { + return UNKNOWN_INPUT_PART; + } + // check one part of in tensors of merge is ready + // if not in scope_tensors, not care + // if in scope_tensors, in_tensor need to be ready + if (std::all_of( + this->in_tensors().begin() + in_tensors().size() / 2, this->in_tensors().end(), + [&](lite::Tensor *in_tensor) { return !IsContain(scope_tensors, in_tensor) || in_tensor->IsReady(); })) { + return RIGHT_INPUT_PART; + } + if (std::all_of( + this->in_tensors().begin(), this->in_tensors().begin() + in_tensors().size() / 2, + [&](lite::Tensor *in_tensor) { return !IsContain(scope_tensors, in_tensor) || in_tensor->IsReady(); })) { + return LEFT_INPUT_PART; + } + return UNKNOWN_INPUT_PART; } int MergeCPUKernel::Run() { MS_ASSERT(in_tensors_.size() == 2 * out_tensors_.size()); - int in_tesnor_part_one = 0; - int in_tensor_part_two = in_tensors_.size() / 2; - int in_tensor_part_three = in_tensors_.size(); - if (PartialInputReady(in_tesnor_part_one, in_tensor_part_two)) { - for (size_t i = 0; i < out_tensors().size(); i++) { - auto out_data = out_tensors_[i]->data_c(); - auto in_data = in_tensors_[i]->data_c(); - if (in_tensors_[i]->data_type() == kObjectTypeTensorType) { - auto in_tensor_list = reinterpret_cast(in_tensors_[i]); - auto out_tensor_list = reinterpret_cast(out_tensors_[i]); - if (std::any_of(in_tensor_list->tensors().begin(), in_tensor_list->tensors().end(), - [&](lite::Tensor *tensor) { return tensor->data_c() == nullptr; })) { - continue; - } - *out_tensor_list = *in_tensor_list; - continue; - } - MS_ASSERT(in_data != nullptr); - MS_ASSERT(out_data != nullptr); - memcpy(out_data, in_data, in_tensors_[i]->Size()); + auto ready_part = FindReadyPart(this->in_tensors_); + if (ready_part == LEFT_INPUT_PART) { + auto ret = MoveData(this->out_tensors_.begin(), this->out_tensors_.end(), this->in_tensors_.begin(), + this->in_tensors_.end()); + if (ret != RET_OK) { + MS_LOG(ERROR) << "carry data error : " << ret; + return ret; } - } - if (PartialInputReady(in_tensor_part_two, in_tensor_part_three)) { - for (size_t i = 0; i < out_tensors().size(); i++) { - auto out_data = out_tensors_[i]->data_c(); - auto in_data = in_tensors_[i + in_tensor_part_two]->data_c(); - if (in_tensors_[i]->data_type() == kObjectTypeTensorType) { - auto in_tensor_list = reinterpret_cast(in_tensors_[i + in_tensor_part_two]); - auto out_tensor_list = reinterpret_cast(out_tensors_[i]); - if (std::any_of(in_tensor_list->tensors().begin(), in_tensor_list->tensors().end(), - [&](lite::Tensor *tensor) { return tensor->data_c() == nullptr; })) { - continue; - } - *out_tensor_list = *in_tensor_list; - continue; - } - MS_ASSERT(in_data != nullptr); - MS_ASSERT(out_data != nullptr); - memcpy(out_data, in_data, in_tensors_[i]->Size()); + } else if (ready_part == RIGHT_INPUT_PART) { + auto ret = MoveData(this->out_tensors_.begin(), this->out_tensors_.end(), + (this->in_tensors_.begin() + in_tensors_.size() / 2), this->in_tensors_.end()); + if (ret != RET_OK) { + MS_LOG(ERROR) << "carry data error : " << ret; + return ret; } + } else { + MS_LOG(ERROR) << "none input part of merge is ready"; + return RET_ERROR; } return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/arm/base/merge.h b/mindspore/lite/src/runtime/kernel/arm/base/merge.h index 481a3b9ad4..7268a1c2ba 100644 --- a/mindspore/lite/src/runtime/kernel/arm/base/merge.h +++ b/mindspore/lite/src/runtime/kernel/arm/base/merge.h @@ -17,21 +17,28 @@ #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_MERGE_H_ #include -#include "src/lite_kernel.h" +#include "src/runtime/kernel/arm/base/carry_data.h" +#include "src/tensor.h" +#include "src/tensorlist.h" namespace mindspore::kernel { -class MergeCPUKernel : public LiteKernel { +enum InputPart { UNKNOWN_INPUT_PART, LEFT_INPUT_PART, RIGHT_INPUT_PART }; + +class MergeCPUKernel : public CarryDataKernel { public: MergeCPUKernel(OpParameter *parameter, const std::vector &inputs, const std::vector &outputs, const lite::InnerContext *ctx, const mindspore::lite::PrimitiveC *primitive) - : LiteKernel(parameter, inputs, outputs, ctx, primitive) {} - ~MergeCPUKernel() override {} + : CarryDataKernel(parameter, inputs, outputs, ctx, primitive) {} + bool IsReady(const std::vector &scope_tensors) override; + ~MergeCPUKernel() override = default; + int FreeInWorkTensor() const override; int Init() override; int ReSize() override; int Run() override; - int FreeInWorkTensor() const override; - bool IsReady(const std::vector &scope_tensors) override; + + private: + InputPart FindReadyPart(const std::vector &scope_tensors); private: bool PartialInputReady(int num_begin, int num_end); diff --git a/mindspore/lite/src/runtime/kernel/arm/base/switch.cc b/mindspore/lite/src/runtime/kernel/arm/base/switch.cc index b3ecb0bfd5..cbd681eb54 100644 --- a/mindspore/lite/src/runtime/kernel/arm/base/switch.cc +++ b/mindspore/lite/src/runtime/kernel/arm/base/switch.cc @@ -43,6 +43,16 @@ int SwitchCPUKernel::PostProcess() { auto out_tensor = out_tensors_.at(out_index++); out_tensor->ResetRefCount(); } + if (!*active) { + for (auto &in_tensor : this->in_tensors_) { + MS_ASSERT(in_tensor != nullptr); + auto root_tensor = in_tensor->root_tensor(); + if (root_tensor == nullptr) { + continue; + } + root_tensor->DecRefCount(); + } + } return FreeInWorkTensor(); } @@ -64,29 +74,20 @@ int SwitchCPUKernel::Run() { MS_LOG(ERROR) << "data of bool tensor is nullptr"; return lite::RET_NULL_PTR; } - size_t in_index = 1; - size_t out_index = (*active) ? 0 : (out_tensors_.size() / 2); - while (in_index < in_tensors_.size()) { - auto in_tensor = in_tensors_.at(in_index++); - auto out_tensor = out_tensors_.at(out_index++); - // copy for tensorlist - if (in_tensor->data_type() == kObjectTypeTensorType) { - auto in_tensor_list = reinterpret_cast(in_tensor); - auto out_tensor_list = reinterpret_cast(out_tensor); - *out_tensor_list = *in_tensor_list; - continue; + if (*active) { + auto ret = MoveData(this->out_tensors_.begin(), this->out_tensors_.begin() + out_tensors_.size() / 2, + this->in_tensors_.begin() + 1, this->in_tensors_.end()); + if (ret != RET_OK) { + MS_LOG(ERROR) << "carry data error : " << ret; + return ret; } - // copy for tensor - MS_ASSERT(in_tensor != nullptr); - MS_ASSERT(out_tensor != nullptr); - auto input = in_tensor->data_c(); - auto output = out_tensor->data_c(); - MS_ASSERT(in_tensor->Size() == out_tensor->Size()); - if (input == nullptr || output == nullptr) { - MS_LOG(ERROR) << "input tensor or output tensor have not been malloced"; - return lite::RET_NULL_PTR; + } else { + auto ret = MoveData(this->out_tensors_.begin() + out_tensors_.size() / 2, this->out_tensors_.end(), + this->in_tensors_.begin() + 1, this->in_tensors_.end()); + if (ret != RET_OK) { + MS_LOG(ERROR) << "carry data error : " << ret; + return ret; } - memcpy(output, input, in_tensor->Size()); } return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/arm/base/switch.h b/mindspore/lite/src/runtime/kernel/arm/base/switch.h index 7e7530a088..66187bd416 100644 --- a/mindspore/lite/src/runtime/kernel/arm/base/switch.h +++ b/mindspore/lite/src/runtime/kernel/arm/base/switch.h @@ -17,30 +17,22 @@ #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_SWITCH_H_ #include +#include "src/runtime/kernel/arm/base/carry_data.h" #include "src/lite_kernel.h" +#include "src/tensorlist.h" namespace mindspore::kernel { - -typedef struct SwitchParameter { - OpParameter op_parameter_; -} SwitchParameter; - -class SwitchCPUKernel : public LiteKernel { +class SwitchCPUKernel : public CarryDataKernel { public: SwitchCPUKernel(OpParameter *parameter, const std::vector &inputs, const std::vector &outputs, const lite::InnerContext *ctx, const mindspore::lite::PrimitiveC *primitive) - : LiteKernel(parameter, inputs, outputs, ctx, primitive) { - switch_param_ = reinterpret_cast(op_parameter_); - } + : CarryDataKernel(parameter, inputs, outputs, ctx, primitive) {} ~SwitchCPUKernel() override = default; int PostProcess() override; int Init() override; int ReSize() override; int Run() override; - - private: - SwitchParameter *switch_param_ = nullptr; }; } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_fp32.cc index 2a1a17b143..5ba89af94e 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_fp32.cc @@ -157,6 +157,7 @@ void ArithmeticCPUKernel::InitRunFunction() { case PrimitiveType_LogicalAnd: arithmetic_run_ = ElementLogicalAnd; arithmetic_run_int_ = ElementLogicalAndInt; + arithmetic_run_bool_ = ElementLogicalAndBool; break; case PrimitiveType_LogicalOr: arithmetic_run_ = ElementLogicalOr; @@ -295,6 +296,8 @@ void ArithmeticCPUKernel::InitParam() { arithmeticParameter_->ndim_ = arithmetic_lite_primitive->NDims(); if (in_tensors_[0]->data_type() == kNumberTypeFloat32 || in_tensors_[0]->data_type() == kNumberTypeFloat16) { data_type_ = kDataTypeFloat; + } else if (in_tensors_[0]->data_type() == kNumberTypeBool) { + data_type_ = KDataTypeBool; } else { data_type_ = kDataTypeInt; } @@ -419,6 +422,10 @@ int ArithmeticCPUKernel::DoArithmetic(int task_id) { error_code = arithmetic_run_(reinterpret_cast(input0_ptr_) + stride * task_id, reinterpret_cast(input1_ptr_) + stride * task_id, reinterpret_cast(out_tensors_[0]->data_c()) + stride * task_id, count); + } else if (data_type_ == KDataTypeBool) { + error_code = arithmetic_run_bool_(reinterpret_cast(input0_ptr_) + stride * task_id, + reinterpret_cast(input1_ptr_) + stride * task_id, + reinterpret_cast(out_tensors_[0]->data_c()) + stride * task_id, count); } else { error_code = arithmetic_run_int_(reinterpret_cast(input0_ptr_) + stride * task_id, reinterpret_cast(input1_ptr_) + stride * task_id, diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_fp32.h b/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_fp32.h index 78bedefdf3..5c6ebfb8a7 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_fp32.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_fp32.h @@ -50,6 +50,7 @@ class ArithmeticCPUKernel : public LiteKernel { typedef int (*ArithmeticIntRun)(const int *input0, const int *input1, int *output, const int element_size); typedef int (*ArithmeticOptIntRun)(const int *input0, const int *input1, int *output, const int element_size, const ArithmeticParameter *param); + typedef int (*ArithmeticBoolRun)(const bool *input0, const bool *input1, bool *output, const int element_size); public: ArithmeticCPUKernel(OpParameter *parameter, const std::vector &inputs, @@ -91,6 +92,7 @@ class ArithmeticCPUKernel : public LiteKernel { ArithmeticOptRun arithmetic_opt_run_ = nullptr; ArithmeticIntRun arithmetic_run_int_ = nullptr; ArithmeticOptIntRun arithmetic_opt_run_int_ = nullptr; + ArithmeticBoolRun arithmetic_run_bool_ = nullptr; }; } // namespace mindspore::kernel #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_ARITHMETIC_H_ diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/tensorlist_fromtensor_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/tensorlist_fromtensor_fp32.cc index 0c1f6ac174..18a65d648b 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/tensorlist_fromtensor_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/tensorlist_fromtensor_fp32.cc @@ -89,7 +89,7 @@ int TensorListFromTensorCPUKernel::Run() { auto in_ptr = reinterpret_cast(input0_->data_c()); // copy data from input0(tensor) to output(tensorlist) vector<*tensor> for (int i = 0; i < dim0; ++i) { - auto out_ptr = output0->GetTensorIndex(i); + auto out_ptr = output0->GetTensor(i); MS_ASSERT(out_ptr != nullptr); if (out_ptr->ElementsNum() != devision_dim0) { MS_LOG(ERROR) << "tensors_[" << i << "].ElementsNum():" << out_ptr->ElementsNum() diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/tensorlist_getitem_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/tensorlist_getitem_fp32.cc index 7018bbed11..9f7df69baf 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/tensorlist_getitem_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/tensorlist_getitem_fp32.cc @@ -49,7 +49,7 @@ int TensorListGetItemCPUKernel::Init() { int TensorListGetItemCPUKernel::Run() { auto input0 = reinterpret_cast(in_tensors_[0]); - auto src_ptr = input0->GetTensorIndex(index_); + auto src_ptr = input0->GetTensor(index_); MS_ASSERT(src_ptr != nullptr); if (src_ptr->data_type() != kTypeUnknown) { if (src_ptr->ElementsNum() != out_tensors_[0]->ElementsNum()) { @@ -57,7 +57,7 @@ int TensorListGetItemCPUKernel::Run() { << " must be equal to out_tensors_[0]->ElementsNum():" << out_tensors_[0]->ElementsNum(); return RET_ERROR; } - auto status = out_tensors_[0]->CopyTensorData(*src_ptr); + auto status = lite::Tensor::CopyTensorData(*src_ptr, out_tensors_[0]); if (status == RET_ERROR) { MS_LOG(ERROR) << "copy tensor data failed!"; return RET_ERROR; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/tensorlist_setitem_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/tensorlist_setitem_fp32.cc index 9ac4a54840..5a74c14c73 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/tensorlist_setitem_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/tensorlist_setitem_fp32.cc @@ -59,23 +59,41 @@ int TensorListSetItemCPUKernel::Run() { MS_ASSERT(output0_ != nullptr); // copy each tensor in tensors_ for (int i = 0; i < output0_->ElementsNum(); ++i) { - auto dst = output0_->GetTensorIndex(i); - MS_ASSERT(dst != nullptr); - auto src = input0_->GetTensorIndex(i); if (i == index_) { - // copy input2_ data buff - src = input2_; - } - MS_ASSERT(src != nullptr); - if (src->data_type() != kTypeUnknown) { - if (src->Size() != dst->Size()) { - MS_LOG(ERROR) << "src->Size():" << src->Size() << " must be equal to dst->Size():" << dst->Size(); - return RET_ERROR; + auto dst = output0_->GetTensor(i); + if (dst == nullptr) { + dst = lite::Tensor::CopyTensor(*input2_, true); + auto &tensors = output0_->tensors(); + tensors.emplace_back(dst); + } else { + dst->set_data_type(input2_->data_type()); + dst->set_shape(input2_->shape()); + dst->set_format(input2_->format()); + dst->set_category(input2_->category()); + dst->set_root_tensor(input2_->root_tensor()); + dst->set_tensor_name(input2_->tensor_name()); + dst->set_quant_clusters(input2_->quant_clusters()); + auto ret = lite::Tensor::CopyTensorData(*input2_, dst); + if (ret != RET_OK) { + MS_LOG(ERROR) << "CopyTensorData[" << i << "] is failed!"; + return RET_ERROR; + } } - auto ret = dst->CopyTensorData(*src); - if (ret != RET_OK) { - MS_LOG(ERROR) << "CopyTensorData[" << i << "] is failed!"; - return RET_ERROR; + } else { + auto src = input0_->GetTensor(i); + auto dst = output0_->GetTensor(i); + MS_ASSERT(src != nullptr); + MS_ASSERT(dst != nullptr); + if (src->data_type() != kTypeUnknown) { + if (src->Size() != dst->Size()) { + MS_LOG(ERROR) << "src->Size():" << src->Size() << " must be equal to dst->Size():" << dst->Size(); + return RET_ERROR; + } + auto ret = lite::Tensor::CopyTensorData(*src, dst); + if (ret != RET_OK) { + MS_LOG(ERROR) << "CopyTensorData[" << i << "] is failed!"; + return RET_ERROR; + } } } } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/tensorlist_stack_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/tensorlist_stack_fp32.cc index b95bb1e4bc..143e2a3801 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/tensorlist_stack_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/tensorlist_stack_fp32.cc @@ -79,6 +79,7 @@ int TensorListStackCPUKernel::MergeElementShape() { return RET_ERROR; } auto ele_shape_data = reinterpret_cast(in_tensors_[1]->data_c()); + output_shape_.clear(); for (int i = 0; i < in_tensors_[1]->ElementsNum(); ++i) { output_shape_.push_back(ele_shape_data[i]); } @@ -94,7 +95,7 @@ int TensorListStackCPUKernel::MergeElementShape() { } if (!IsFullyDefined(input0_->element_shape())) { for (int i = 0; i < input0_->ElementsNum(); ++i) { // get tensorlist every tensor - auto tensor_ele = input0_->GetTensorIndex(i); + auto tensor_ele = input0_->GetTensor(i); MS_ASSERT(tensor_ele != nullptr); if (tensor_ele->data_type() != kTypeUnknown) { status = MergeSubShape(tensor_ele->shape()); @@ -150,7 +151,7 @@ int TensorListStackCPUKernel::Run() { } auto out_ptr = reinterpret_cast(output0_->MutableData()); for (int i = 0; i < num_element_; ++i) { - auto in_ptr = input0_->GetTensorIndex(i); + auto in_ptr = input0_->GetTensor(i); MS_ASSERT(in_ptr != nullptr); if (in_ptr->data_type() != kTypeUnknown) { int in_size = in_ptr->ElementsNum(); diff --git a/mindspore/lite/src/sub_graph_kernel.cc b/mindspore/lite/src/sub_graph_kernel.cc index ff7369d3d0..6cd3d54476 100644 --- a/mindspore/lite/src/sub_graph_kernel.cc +++ b/mindspore/lite/src/sub_graph_kernel.cc @@ -115,11 +115,7 @@ int SubGraphKernel::ReSize(bool is_interrupt) { std::vector inputs = kernel->in_tensors(); std::vector outputs = kernel->out_tensors(); for (auto &output : outputs) { - auto ret = output->FreeData(); - if (ret != RET_OK) { - MS_LOG(ERROR) << "FreeData failed"; - return RET_ERROR; - } + output->FreeData(); } primitive->set_infer_flag(!is_interrupt); auto ret = primitive->InferShape(inputs, outputs); diff --git a/mindspore/lite/src/sub_graph_kernel.h b/mindspore/lite/src/sub_graph_kernel.h index f91a95c173..990da11375 100644 --- a/mindspore/lite/src/sub_graph_kernel.h +++ b/mindspore/lite/src/sub_graph_kernel.h @@ -52,13 +52,13 @@ struct DataStore { class SubGraphKernel : public LiteKernel { public: - explicit SubGraphKernel(const std::vector &inputs, const std::vector &outputs, - const std::vector &in_kernels, const std::vector &out_kernels, - std::vector nodes, const lite::InnerContext *ctx) + SubGraphKernel(const std::vector &inputs, const std::vector &outputs, + std::vector in_kernels, std::vector out_kernels, + std::vector nodes, const lite::InnerContext *ctx) : LiteKernel(nullptr, inputs, outputs, ctx, nullptr), nodes_(std::move(nodes)), - in_nodes_(in_kernels), - out_nodes_(out_kernels) { + in_nodes_(std::move(in_kernels)), + out_nodes_(std::move(out_kernels)) { subgraph_type_ = kCpuFP32SubGraph; } @@ -109,20 +109,20 @@ class SubGraphKernel : public LiteKernel { std::vector nodes() { return this->nodes_; } protected: - std::vector nodes_; + std::vector nodes_{}; // entry nodes in nodes - std::vector in_nodes_; + std::vector in_nodes_{}; // exit nodes in nodes - std::vector out_nodes_; + std::vector out_nodes_{}; mindspore::lite::Executor *executor_ = nullptr; }; class CpuSubGraph : public SubGraphKernel { public: - explicit CpuSubGraph(const std::vector &inputs, const std::vector &outputs, - const std::vector &in_kernels, const std::vector &out_kernels, - const std::vector &nodes, const lite::InnerContext *ctx) - : SubGraphKernel(inputs, outputs, in_kernels, out_kernels, nodes, ctx) { + CpuSubGraph(const std::vector &inputs, const std::vector &outputs, + std::vector in_kernels, std::vector out_kernels, + std::vector nodes, const lite::InnerContext *ctx) + : SubGraphKernel(inputs, outputs, std::move(in_kernels), std::move(out_kernels), std::move(nodes), ctx) { subgraph_type_ = kCpuFP32SubGraph; } @@ -139,10 +139,10 @@ class CpuSubGraph : public SubGraphKernel { class CpuFp32SubGraph : public CpuSubGraph { public: - explicit CpuFp32SubGraph(const std::vector &inputs, const std::vector &outputs, - const std::vector &in_kernels, const std::vector &out_kernels, - const std::vector &nodes, const lite::InnerContext *ctx) - : CpuSubGraph(inputs, outputs, in_kernels, out_kernels, nodes, ctx) { + CpuFp32SubGraph(const std::vector &inputs, const std::vector &outputs, + std::vector in_kernels, std::vector out_kernels, + std::vector nodes, const lite::InnerContext *ctx) + : CpuSubGraph(inputs, outputs, std::move(in_kernels), std::move(out_kernels), std::move(nodes), ctx) { subgraph_type_ = kCpuFP32SubGraph; this->name_ = "CpuFP32SubGraph"; } @@ -159,10 +159,10 @@ class CpuFp32SubGraph : public CpuSubGraph { class CpuFp16SubGraph : public CpuSubGraph { public: - explicit CpuFp16SubGraph(const std::vector &inputs, const std::vector &outputs, - const std::vector &in_kernels, const std::vector &out_kernels, - const std::vector &nodes, const lite::InnerContext *ctx) - : CpuSubGraph(inputs, outputs, in_kernels, out_kernels, nodes, ctx) { + CpuFp16SubGraph(const std::vector &inputs, const std::vector &outputs, + std::vector in_kernels, std::vector out_kernels, + std::vector nodes, const lite::InnerContext *ctx) + : CpuSubGraph(inputs, outputs, std::move(in_kernels), std::move(out_kernels), std::move(nodes), ctx) { subgraph_type_ = kCpuFP16SubGraph; this->name_ = "CpuFP16SubGraph"; } @@ -180,7 +180,7 @@ class CpuFp16SubGraph : public CpuSubGraph { void FreeOriginInputData(); private: - std::vector origin_input_data_; + std::vector origin_input_data_{}; }; } // namespace mindspore::kernel #endif // MINDSPORE_LITE_SRC_SUB_GRAPH_H diff --git a/mindspore/lite/src/tensor.cc b/mindspore/lite/src/tensor.cc index 6b919093b6..da022e599e 100644 --- a/mindspore/lite/src/tensor.cc +++ b/mindspore/lite/src/tensor.cc @@ -29,48 +29,53 @@ namespace lite { Tensor::Tensor(const TypeId data_type, std::vector shape, const schema::Format &format, Category category) : data_type_(data_type), shape_(std::move(shape)), format_(format), category_(category) {} -Tensor::Tensor(const Tensor &tensor) { - auto ret = CopyTensor(tensor, true); - if (0 != ret) { - MS_LOG(ERROR) << "CopyTensorData error"; +int Tensor::CopyTensorData(const Tensor &src_tensor, Tensor *dst_tensor) { + if (dst_tensor == nullptr) { + MS_LOG(ERROR) << "dst_tensor is nullptr"; + return RET_PARAM_INVALID; } -} - -int Tensor::CopyTensorData(const Tensor &src_tensor) { if (src_tensor.data_ == nullptr) { MS_LOG(ERROR) << "data of src tensor is nullptr"; return RET_PARAM_INVALID; } - size_t data_size = this->Size(); - MS_ASSERT(data_size == src_tensor.Size()); - if (this->data_ == nullptr) { + size_t data_size = dst_tensor->Size(); + if (data_size != src_tensor.Size()) { + MS_LOG(ERROR) << "Size of dst tensor is not compatible with src tensor"; + return RET_ERROR; + } + if (dst_tensor->data_ == nullptr) { if (data_size > kMaxMallocSize) { MS_LOG(ERROR) << "Malloc size is too big while coping data, " << data_size << " bytes"; return RET_ERROR; } - this->data_ = malloc(data_size); - if (this->data_ == nullptr) { + dst_tensor->data_ = malloc(data_size); + if (dst_tensor->data_ == nullptr) { MS_LOG(ERROR) << "Malloc memory failed"; return RET_ERROR; } } - memcpy(this->data_, src_tensor.data_, data_size); + memcpy(dst_tensor->data_, src_tensor.data_, data_size); return RET_OK; } -int Tensor::CopyTensor(const Tensor &src_tensor, bool copy_data) { - this->data_type_ = src_tensor.data_type_; - this->shape_ = src_tensor.shape_; - this->category_ = src_tensor.category_; - this->format_ = src_tensor.format_; +Tensor *Tensor::CopyTensor(const Tensor &src_tensor, bool copy_data) { + auto *result = new (std::nothrow) Tensor; + if (result == nullptr) { + MS_LOG(ERROR) << "New tensor failed"; + return nullptr; + } + result->data_type_ = src_tensor.data_type_; + result->shape_ = src_tensor.shape_; + result->category_ = src_tensor.category_; + result->format_ = src_tensor.format_; if (copy_data) { - auto ret = CopyTensorData(src_tensor); - if (0 != ret) { + auto ret = CopyTensorData(src_tensor, result); + if (ret != RET_OK) { MS_LOG(ERROR) << "CopyTensorData error"; - return RET_ERROR; + return nullptr; } } - return RET_OK; + return result; } Tensor::~Tensor() { @@ -84,18 +89,6 @@ Tensor::~Tensor() { } } -Tensor &Tensor::operator=(const Tensor &tensor) { - if (&tensor == this) { - return *this; - } - auto ret = CopyTensor(tensor, true); - if (0 != ret) { - MS_LOG(ERROR) << "CopyTensorData error"; - MS_ASSERT(false); - } - return *this; -} - bool Tensor::operator==(const Tensor &tensor) { return data_ == tensor.data_ && shape_ == tensor.shape_ && data_type_ == tensor.data_type_; } @@ -283,6 +276,25 @@ std::string Tensor::ToString() const { return oss.str(); } +int Tensor::set_root_tensor(Tensor *tensor) { + this->root_tensor_ = tensor; + if (this->root_tensor_ == this) { + return RET_OK; + } + if (this->root_tensor_ == nullptr) { + MS_LOG(ERROR) << "root tensor is nullptr"; + return RET_NULL_PTR; + } + this->shape_ = this->root_tensor_->shape_; + this->format_ = this->root_tensor_->format_; + this->data_type_ = this->root_tensor_->data_type_; + this->allocator_ = this->root_tensor_->allocator_; + this->category_ = this->root_tensor_->category_; + this->quant_params_ = this->root_tensor_->quant_params_; + this->quant_clusters_ = this->root_tensor_->quant_clusters_; + return RET_OK; +} + int Tensor::MallocData(const mindspore::lite::Allocator *allocator) { if (nullptr != this->data_) { return RET_OK; @@ -303,9 +315,9 @@ int Tensor::MallocData(const mindspore::lite::Allocator *allocator) { return RET_OK; } -int Tensor::FreeData() { +void Tensor::FreeData() { if (nullptr == this->data_) { - return RET_OK; + return; } if (nullptr == allocator_) { free(this->data_); @@ -314,10 +326,19 @@ int Tensor::FreeData() { allocator_->Free(this->data_); this->data_ = nullptr; } - return RET_OK; } void *Tensor::MutableData() { + if (this->root_tensor_ != nullptr) { + if (this->root_tensor_ != this && this->root_tensor_->data_ == nullptr) { + MS_LOG(ERROR) << "root tensor has not been malloced"; + return nullptr; + } else if (this->root_tensor_ != this && this->root_tensor_->data_ != nullptr) { + return this->root_tensor_->data_; + } else { + // malloc self + } + } if (this->data_ == nullptr) { auto ret = this->MallocData(); if (ret != 0) { @@ -328,6 +349,17 @@ void *Tensor::MutableData() { return this->data_; } +void Tensor::DecRefCount() { + if (this->IsConst() || this->IsGraphInput()) { + return; + } + this->ref_count_--; + if (this->ref_count_ <= 0) { + FreeData(); + this->ref_count_ = 0; + } +} + void Tensor::AddQuantParam(const QuantArg &quant_arg) { this->quant_params_.push_back(quant_arg); } std::vector Tensor::quant_params() const { return this->quant_params_; } diff --git a/mindspore/lite/src/tensor.h b/mindspore/lite/src/tensor.h index b67962fc43..69e44494ad 100644 --- a/mindspore/lite/src/tensor.h +++ b/mindspore/lite/src/tensor.h @@ -53,15 +53,19 @@ class Tensor : public mindspore::tensor::MSTensor { Tensor(TypeId data_type, std::vector shape, const schema::Format &format = schema::Format::Format_NHWC, Category category = VAR); - Tensor(const Tensor &tensor); + Tensor(const Tensor &tensor) = delete; - ~Tensor() override; + Tensor(Tensor &&other) = delete; + + Tensor &operator=(const Tensor &tensor) = delete; - int CopyTensorData(const Tensor &srcTensor); + Tensor &operator=(Tensor &&src) = delete; - int CopyTensor(const Tensor &srcTensor, bool copyData = false); + ~Tensor() override; - Tensor &operator=(const Tensor &tensor); + static int CopyTensorData(const Tensor &src_tensor, Tensor *dst_tensor); + + static Tensor *CopyTensor(const Tensor &src_tensor, bool copy_data = false); virtual bool operator==(const Tensor &tensor); @@ -99,11 +103,16 @@ class Tensor : public mindspore::tensor::MSTensor { virtual int MallocData(const mindspore::lite::Allocator *allocator = nullptr); - virtual int FreeData(); + virtual void FreeData(); void *MutableData() override; - virtual void *data_c() const { return data_; } + virtual void *data_c() const { + if (this->root_tensor_ != nullptr) { + return this->root_tensor_->data_; + } + return data_; + } virtual void set_data(void *data) { this->data_ = data; } @@ -125,7 +134,7 @@ class Tensor : public mindspore::tensor::MSTensor { void ResetRefCount() { this->ref_count_ = this->init_ref_count_; } - void DecRefCount() { this->ref_count_--; } + void DecRefCount(); std::string ToString() const; @@ -151,6 +160,14 @@ class Tensor : public mindspore::tensor::MSTensor { } } + virtual int set_root_tensor(Tensor *tensor); + + Tensor *root_tensor() const { return this->root_tensor_; } + + bool IsReady() const { + return this->IsConst() || (this->IsGraphInput() && this->data_ != nullptr) || this->ref_count_ >= 1; + } + private: template std::string DataToString(void *data, size_t data_number) const { @@ -168,7 +185,6 @@ class Tensor : public mindspore::tensor::MSTensor { protected: std::string tensor_name_; void *data_ = nullptr; - void *device_data_ = nullptr; TypeId data_type_; std::vector shape_; schema::Format format_; @@ -178,6 +194,7 @@ class Tensor : public mindspore::tensor::MSTensor { std::vector quant_params_; std::vector quant_clusters_; mindspore::lite::Allocator *allocator_ = nullptr; + Tensor *root_tensor_ = nullptr; }; inline size_t DataTypeSize(const TypeId type) { diff --git a/mindspore/lite/src/tensorlist.cc b/mindspore/lite/src/tensorlist.cc index 85dc1dbdc5..1b57a60112 100644 --- a/mindspore/lite/src/tensorlist.cc +++ b/mindspore/lite/src/tensorlist.cc @@ -14,38 +14,26 @@ * limitations under the License. */ +#include "src/tensorlist.h" +#include #include "include/ms_tensor.h" - #include "src/common/log_adapter.h" #include "schema/model_generated.h" #include "src/tensor.h" -#include "src/tensorlist.h" -namespace mindspore { -namespace lite { +namespace mindspore::lite { TensorList::TensorList(std::vector shape, std::vector element_shape, Category category) - : Tensor(kObjectTypeTensorType, shape, schema::Format::Format_NHWC, category), element_shape_(element_shape) {} + : Tensor(kObjectTypeTensorType, std::move(shape), schema::Format::Format_NHWC, category), + element_shape_(std::move(element_shape)) {} TensorList::~TensorList() { if (!this->tensors_.empty()) { - this->FreeData(); + this->TensorList::FreeData(); this->FreeTensorListData(); } } -TensorList &TensorList::operator=(const TensorList &src) { - if (&src == this) { - return *this; - } - auto ret = CopyTensorList(src, true); - if (ret == RET_ERROR) { - MS_LOG(ERROR) << "CopyTensorList error!"; - MS_ASSERT(false); - } - return *this; -} - int TensorList::CopyTensorList(const TensorList &src, bool copy_data) { this->data_type_ = src.data_type_; this->tensors_data_type_ = src.tensors_data_type_; @@ -59,6 +47,10 @@ int TensorList::CopyTensorList(const TensorList &src, bool copy_data) { return RET_ERROR; } } else { + for (auto tensor : this->tensors()) { + delete tensor; + } + this->tensors_.clear(); // each tensor in tensors_ will share the same memory space. this->tensors_ = src.tensors_; } @@ -69,17 +61,20 @@ int TensorList::CopyTensorData(const TensorList &src) { if (src.tensors_.empty()) { return RET_OK; } + for (auto tensor : this->tensors()) { + delete tensor; + } + this->tensors_.clear(); for (int i = 0; i < this->ElementsNum(); ++i) { if (src.tensors_[i] == nullptr) { MS_LOG(ERROR) << "src tensors_[" << i << "] is nullptr!"; return RET_ERROR; } - auto dst_tensor = new (std::nothrow) Tensor; + auto dst_tensor = Tensor::CopyTensor(*src.tensors_[i]); if (dst_tensor == nullptr) { MS_LOG(ERROR) << "CopyTensorData: new tensor[" << i << "] is failed!"; return RET_ERROR; } - *reinterpret_cast(dst_tensor) = *src.tensors_[i]; this->tensors_.push_back(dst_tensor); } return RET_OK; @@ -143,17 +138,11 @@ int TensorList::MallocData(const mindspore::lite::Allocator *allocator) { return RET_OK; } -int TensorList::FreeData() { +void TensorList::FreeData() { // free data buf of each tensor in tensors_ - if (this->tensors_.empty()) { - return RET_OK; + for (auto tensor : tensors_) { + tensor->FreeData(); } - for (int i = 0; i < this->ElementsNum(); ++i) { - if (this->tensors_[i] != nullptr) { - this->tensors_[i]->FreeData(); - } - } - return RET_OK; } int TensorList::FreeTensorListData() { @@ -171,7 +160,7 @@ int TensorList::FreeTensorListData() { return RET_OK; } -int TensorList::SetTensorIndex(int index, Tensor *src_tensor) { +int TensorList::SetTensor(int index, Tensor *src_tensor) { // your can use this fun to modify tensor[index] value if (src_tensor->data_type() != this->tensors_data_type_) { MS_LOG(ERROR) << "src_tensor->data_type():" << src_tensor->data_type() @@ -183,15 +172,13 @@ int TensorList::SetTensorIndex(int index, Tensor *src_tensor) { return RET_ERROR; } auto dst_tensor = this->tensors_[index]; - if (dst_tensor != nullptr) { // free original tensor data - delete dst_tensor; - } - this->tensors_[index] = new (std::nothrow) Tensor; + // free original tensor data + delete dst_tensor; + this->tensors_[index] = Tensor::CopyTensor(*src_tensor); if (this->tensors_[index] == nullptr) { - MS_LOG(ERROR) << "SetTensorIndex: new tensor is failed!"; + MS_LOG(ERROR) << "SetTensor: new tensor is failed!"; return RET_ERROR; } - *this->tensors_[index] = *src_tensor; return RET_OK; } @@ -211,9 +198,40 @@ int TensorList::CheckTensorListParam() { return RET_OK; } -Tensor *TensorList::GetTensorIndex(int index) { +int TensorList::set_root_tensor(Tensor *tensor) { + auto ret = Tensor::set_root_tensor(tensor); + if (ret != RET_OK) { + return ret; + } + if (this->data_type_ != kObjectTypeTensorType) { + return RET_OK; + } + auto root_tensorlist = reinterpret_cast(this->root_tensor_); + if (root_tensorlist == nullptr) { + MS_LOG(ERROR) << "root_tensor of tensorlist should be a tensorlist"; + return RET_INFER_INVALID; + } + this->element_shape_ = root_tensorlist->element_shape_; + this->max_elements_num_ = root_tensorlist->max_elements_num_; + this->tensors_data_type_ = root_tensorlist->tensors_data_type_; + return RET_OK; +} + +Tensor *TensorList::GetTensor(int index) { // return tensor[index] ptr. With this function, you can modify tensors_[index] at will. - if (index < 0 || index >= static_cast(tensors_.size())) { + if (this->root_tensor_ != nullptr) { + if (this->data_type_ != kObjectTypeTensorType) { + MS_LOG(ERROR) << "root_tensor of tensorlist should be a tensorlist"; + return nullptr; + } + auto root_tensorlist = reinterpret_cast(this->root_tensor_); + if (index < 0 || index >= static_cast(root_tensorlist->tensors_.size())) { + MS_LOG(ERROR) << "index:" << index << " must in [0, " << this->ElementsNum() - 1 << "]!"; + return nullptr; + } + return root_tensorlist->tensors_[index]; + } + if (index < 0 || index >= static_cast(this->tensors_.size())) { MS_LOG(ERROR) << "index:" << index << " must in [0, " << this->ElementsNum() - 1 << "]!"; return nullptr; } @@ -264,5 +282,4 @@ STATUS TensorList::Decode(const int *data) { bool TensorList::IsConst() const { return this->category_ == CONST_TENSOR || this->category_ == CONST_SCALAR; } -} // namespace lite -} // namespace mindspore +} // namespace mindspore::lite diff --git a/mindspore/lite/src/tensorlist.h b/mindspore/lite/src/tensorlist.h index 1291c3c80d..e8529ffafe 100644 --- a/mindspore/lite/src/tensorlist.h +++ b/mindspore/lite/src/tensorlist.h @@ -25,8 +25,7 @@ #include "schema/model_generated.h" #include "src/tensor.h" -namespace mindspore { -namespace lite { +namespace mindspore::lite { /** * Tensorlist is a container of vector, in which each element is a tensor object. * Member objects: @@ -64,17 +63,9 @@ class TensorList : public Tensor { ~TensorList() override; - // **Note**: This is a shallow copy, src and dst tensorlist share one memory space of each tensor in tensors_ - // If your want to not share one memory space please use "operator=" - TensorList(const TensorList &other) - : Tensor(other.data_type_, other.shape()), - tensors_(other.tensors_), - tensors_data_type_(other.tensors_data_type_), - element_shape_(other.element_shape_), - max_elements_num_(other.max_elements_num_) {} + TensorList(const TensorList &other) = delete; - // tensorlist deep copy memory - TensorList &operator=(const TensorList &tl); + TensorList &operator=(const TensorList &tl) = delete; void set_element_shape(const std::vector &shape) { element_shape_ = shape; } @@ -90,15 +81,15 @@ class TensorList : public Tensor { int FreeTensorListData(); - int FreeData() override; + void FreeData() override; int CopyTensorList(const TensorList &src, bool copy_data); int CopyTensorData(const TensorList &src); - int SetTensorIndex(int index, Tensor *); + int SetTensor(int index, Tensor *src_tensor); - Tensor *GetTensorIndex(int index); + Tensor *GetTensor(int index); void set_tensors_data_type(TypeId type) { tensors_data_type_ = type; } @@ -106,6 +97,8 @@ class TensorList : public Tensor { std::vector &tensors() { return tensors_; } + void set_tensors(const std::vector &tensors) { this->tensors_ = tensors; } + int CheckTensorListParam(); bool IsCompatibleShape(const std::vector &shape); @@ -116,18 +109,19 @@ class TensorList : public Tensor { bool IsConst() const override; + int set_root_tensor(Tensor *tensor) override; + protected: // The following functions must be masked. - void set_data(void *data) override { return; } + void set_data(void *data) override {} void *data_c() const override { return nullptr; } void *MutableData() override { return nullptr; } size_t Size() const override { return 0; } - std::vector tensors_; - TypeId tensors_data_type_; - std::vector element_shape_; + std::vector tensors_{}; + TypeId tensors_data_type_ = kTypeUnknown; + std::vector element_shape_{}; int max_elements_num_ = -1; }; -} // namespace lite -} // namespace mindspore +} // namespace mindspore::lite #endif // MINDSPORE_LITE_SRC_TENSORLIST_H_ diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/common.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/common.cc index 57c7ece607..484175ac6f 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/common.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/common.cc @@ -43,23 +43,23 @@ void TestMain(const std::vector &input_infos, std::tupleCompileGraph() -> ConvertTensors() MS_LOG(DEBUG) << "create Tensors & init weight data"; - std::vector tensors; + std::vector> tensors; // firstly, create all Tensors tensors.reserve(input_infos.size()); // vector's capacity() is 0, so call reserve() avoiding vector re-malloc for (auto input_info : input_infos) { auto &shape = std::get<0>(input_info); auto category = std::get<2>(input_info); auto data_type = std::get<3>(input_info); - tensors.emplace_back(data_type, shape, Format_NHWC, category); + tensors.emplace_back(std::make_shared(data_type, shape, Format_NHWC, category)); } // secondly, init weight Tensor's data std::vector kernel_inputs; std::vector subgraph_inputs; std::map subgraph_inputs_data; for (int i = 0; i < tensors.size(); ++i) { - auto *tensor = &tensors[i]; + auto tensor = tensors[i]; auto *input_data = std::get<1>(input_infos[i]); - kernel_inputs.push_back(tensor); + kernel_inputs.push_back(tensor.get()); if (tensor->category() != VAR) { // tensor is weight // simulating src/lite_session.cc:WeightTensorNeedCopy() if (packed_op.count(primitive_type)) { @@ -69,8 +69,8 @@ void TestMain(const std::vector &input_infos, std::tupledata_type() == kNumberTypeFloat32 || tensor->data_type() == kNumberTypeInt32); - subgraph_inputs.push_back(tensor); - subgraph_inputs_data[tensor] = reinterpret_cast(input_data); + subgraph_inputs.push_back(tensor.get()); + subgraph_inputs_data[tensor.get()] = reinterpret_cast(input_data); } } @@ -115,7 +115,7 @@ void TestMain(const std::vector &input_infos, std::tupleFree(), clear weight data in input_infos std::vector> saved_weights; for (int i = 0; i < tensors.size(); ++i) { - auto *tensor = &tensors[i]; + auto &tensor = tensors[i]; if (tensor->category() != VAR) { saved_weights.emplace_back(new uint8_t[tensor->Size()]); auto *weight_data = std::get<1>(input_infos[i]); @@ -143,12 +143,12 @@ void TestMain(const std::vector &input_infos, std::tuplecategory() != VAR && packed_op.count(primitive_type)) { + tensor->set_data(nullptr); } } for (int i = 0, j = 0; i < tensors.size(); ++i) { // resume weight data to input_infos - auto *tensor = &tensors[i]; + auto &tensor = tensors[i]; if (tensor->category() != VAR) { auto *weight_data = std::get<1>(input_infos[i]); memcpy(weight_data, saved_weights[j++].get(), tensor->Size());