Merge pull request !3977 from nhussain/multi_dim_concat_2tags/v0.7.0-beta
| @@ -526,16 +526,34 @@ Status Tensor::StartAddrOfIndex(std::vector<dsize_t> ind, uchar **start_addr_of_ | |||
| return Status::OK(); | |||
| } | |||
| Status Tensor::InsertTensor(const std::vector<dsize_t> &ind, const std::shared_ptr<Tensor> &tensor) { | |||
| Status Tensor::InsertTensor(const std::vector<dsize_t> &ind, const std::shared_ptr<Tensor> &tensor, | |||
| const bool partial_insert) { | |||
| std::string err_msg; | |||
| err_msg += (this->type() == DataType::DE_STRING) ? "[Tensor] Cannot batch tensors of type string\n" : ""; | |||
| err_msg += (!this->shape().known() || !tensor->shape().known()) ? "[Tensor] unknown shape\n" : ""; | |||
| err_msg += (ind.size() + tensor->Rank() != this->Rank()) ? "[Tensor] incorrect index\n" : ""; | |||
| err_msg += tensor->type().SizeInBytes() != this->type().SizeInBytes() ? "[Tensor] incorrect datatype\n" : ""; | |||
| if (partial_insert) { | |||
| err_msg += (ind.size() != 1) | |||
| ? "[Tensor] only supports 1D insertion of elements not along the full length of the axis\n" | |||
| : ""; | |||
| err_msg += | |||
| (ind.at(0) + tensor->shape().NumOfElements() > shape().NumOfElements()) ? "[Tensor] incorrect index\n" : ""; | |||
| } else { | |||
| err_msg += (ind.size() + tensor->Rank() != Rank()) ? "[Tensor] incorrect index\n" : ""; | |||
| } | |||
| err_msg += (type() == DataType::DE_STRING) ? "[Tensor] Cannot insert into a tensor of type string\n" : ""; | |||
| err_msg += (!shape().known() || !tensor->shape().known()) ? "[Tensor] unknown shape\n" : ""; | |||
| err_msg += tensor->type().SizeInBytes() != type().SizeInBytes() ? "[Tensor] incorrect datatype\n" : ""; | |||
| uchar *start_addr_of_ind = nullptr; | |||
| TensorShape remaining_shape = TensorShape::CreateUnknownRankShape(); | |||
| err_msg += (!StartAddrOfIndex(ind, &start_addr_of_ind, &remaining_shape).IsOk()) ? "[Tensor] incorrect index\n" : ""; | |||
| err_msg += !(remaining_shape == tensor->shape()) ? "[Tensor] memory error\n" : ""; | |||
| if (partial_insert) { | |||
| TensorShape remaining_shape = tensor->shape(); | |||
| err_msg += | |||
| (!StartAddrOfIndex(ind, &start_addr_of_ind, &remaining_shape).IsOk()) ? "[Tensor] incorrect index\n" : ""; | |||
| } else { | |||
| TensorShape remaining_shape = TensorShape::CreateUnknownRankShape(); | |||
| err_msg += | |||
| (!StartAddrOfIndex(ind, &start_addr_of_ind, &remaining_shape).IsOk()) ? "[Tensor] incorrect index\n" : ""; | |||
| err_msg += !(remaining_shape == tensor->shape()) ? "[Tensor] memory error\n" : ""; | |||
| } | |||
| if (!err_msg.empty()) { | |||
| MS_LOG(DEBUG) << "Insert tensor message: " << err_msg; | |||
| RETURN_STATUS_UNEXPECTED(err_msg); | |||
| @@ -556,39 +574,6 @@ Status Tensor::InsertTensor(const std::vector<dsize_t> &ind, const std::shared_p | |||
| } | |||
| } | |||
| Status Tensor::Concatenate(const std::vector<dsize_t> &index, const std::shared_ptr<Tensor> &tensor) { | |||
| std::string err_msg; | |||
| err_msg += (index.size() != 1) ? "[Tensor] only supports 1d concatenation \n" : ""; | |||
| err_msg += (type() == DataType::DE_STRING) ? "[Tensor] Cannot batch tensors of type string\n" : ""; | |||
| err_msg += (!shape().known() || !tensor->shape().known()) ? "[Tensor] unknown shape\n" : ""; | |||
| err_msg += | |||
| (index.at(0) + tensor->shape().NumOfElements() > this->shape().NumOfElements()) ? "[Tensor] incorrect index\n" : ""; | |||
| err_msg += tensor->type().SizeInBytes() != this->type().SizeInBytes() ? "[Tensor] incorrect datatype\n" : ""; | |||
| uchar *start_addr_of_ind = nullptr; | |||
| TensorShape remaining_shape = tensor->shape(); | |||
| StartAddrOfIndex(index, &start_addr_of_ind, &remaining_shape); | |||
| err_msg += (start_addr_of_ind == nullptr) ? "Failed to create memory for Tensor.\n" : ""; | |||
| if (!err_msg.empty()) { | |||
| MS_LOG(DEBUG) << "Insert tensor message: " << err_msg; | |||
| RETURN_STATUS_UNEXPECTED(err_msg); | |||
| } else { | |||
| int ret_code = | |||
| memcpy_s(start_addr_of_ind, tensor->SizeInBytes(), tensor->GetMutableBuffer(), tensor->SizeInBytes()); | |||
| if (ret_code == 0) { | |||
| return Status::OK(); | |||
| } else { | |||
| err_msg += "[Tensor] error in memcpy_s when inserting tensor\n"; | |||
| MS_LOG(DEBUG) << "Tensor message: " << err_msg; | |||
| RETURN_STATUS_UNEXPECTED(err_msg); | |||
| } | |||
| } | |||
| } | |||
| Status Tensor::ExpandDim(const dsize_t &axis) { | |||
| if (axis > Rank()) { | |||
| std::string err = "Axis is out of bound"; | |||
| @@ -330,8 +330,10 @@ class Tensor { | |||
| /// Ex: shape <2,3>, inserting into index {0} will replace the first row. index {1,2} will replace the last cell. | |||
| /// \param index | |||
| /// \param input | |||
| /// \param partial_insert: boolean to determine if insertion along the full axis is enforced | |||
| /// \return Status code | |||
| Status InsertTensor(const std::vector<dsize_t> &index, const std::shared_ptr<Tensor> &input); | |||
| Status InsertTensor(const std::vector<dsize_t> &index, const std::shared_ptr<Tensor> &input, | |||
| const bool partial_insert = false); | |||
| /// Find the address of the given index. Used in InsertTensor. | |||
| /// Example: | |||
| @@ -393,9 +395,6 @@ class Tensor { | |||
| static Status GetBufferInfo(Tensor *t, py::buffer_info *out); | |||
| #endif | |||
| /// Concatenate based on given tensor, can fill in current tensor with a smaller one, unlike InsertTensor | |||
| Status Concatenate(const std::vector<dsize_t> &index, const std::shared_ptr<Tensor> &input); | |||
| /// TensorIterator is a linear iterator that can be used to iterate over the elements of the Tensor | |||
| /// The order elements is as the memory layout (i.e., row-major) [[1,2,3],[4,5,6] --> 1,2,3,4,5,6 | |||
| /// \tparam T type of values in the Tensor Iterator | |||
| @@ -330,8 +330,10 @@ class Tensor { | |||
| /// Ex: shape <2,3>, inserting into index {0} will replace the first row. index {1,2} will replace the last cell. | |||
| /// \param index | |||
| /// \param input | |||
| /// \param partial_insert: boolean to determine if insertion along the full axis is enforced | |||
| /// \return Status code | |||
| Status InsertTensor(const std::vector<dsize_t> &index, const std::shared_ptr<Tensor> &input); | |||
| Status InsertTensor(const std::vector<dsize_t> &index, const std::shared_ptr<Tensor> &input, | |||
| const bool partial_insert = false); | |||
| /// Find the address of the given index. Used in InsertTensor. | |||
| /// Example: | |||
| @@ -393,9 +395,6 @@ class Tensor { | |||
| static Status GetBufferInfo(Tensor *t, py::buffer_info *out); | |||
| #endif | |||
| /// Concatenate based on given tensor, can fill in current tensor with a smaller one, unlike InsertTensor | |||
| Status Concatenate(const std::vector<dsize_t> &index, const std::shared_ptr<Tensor> &input); | |||
| /// TensorIterator is a linear iterator that can be used to iterate over the elements of the Tensor | |||
| /// The order elements is as the memory layout (i.e., row-major) [[1,2,3],[4,5,6] --> 1,2,3,4,5,6 | |||
| /// \tparam T type of values in the Tensor Iterator | |||
| @@ -580,77 +580,73 @@ Status Mask(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *outpu | |||
| Status Concatenate(const TensorRow &input, TensorRow *output, int8_t axis, std::shared_ptr<Tensor> prepend, | |||
| std::shared_ptr<Tensor> append) { | |||
| CHECK_FAIL_RETURN_UNEXPECTED(input[0]->shape().Rank() == 1, "Only 1D tensors supported"); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(axis == 0 || axis == -1, "Only concatenation along the last dimension supported"); | |||
| axis = Tensor::HandleNeg(axis, input[0]->shape().Rank()); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(axis == 0, "Only axis=0 is supported"); | |||
| std::shared_ptr<Tensor> out; | |||
| TensorShape t = TensorShape::CreateScalar(); | |||
| DataType first_dtype = input[0]->type(); | |||
| TensorRow tensor_list; | |||
| if (prepend != nullptr) { | |||
| CHECK_FAIL_RETURN_UNEXPECTED(first_dtype == prepend->type(), "Tensor types do not match"); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(prepend->shape().Rank() == 1, "Only 1D tensors supported"); | |||
| RETURN_IF_NOT_OK(ConcatenateHelper(prepend, &out, axis, input[0])); | |||
| } else { | |||
| out = input[0]; | |||
| tensor_list.emplace_back(prepend); | |||
| } | |||
| for (dsize_t i = 1; i < input.size(); i++) { | |||
| std::shared_ptr<Tensor> out_t; | |||
| for (dsize_t i = 0; i < input.size(); i++) { | |||
| CHECK_FAIL_RETURN_UNEXPECTED(first_dtype == input[i]->type(), "Tensor types do not match"); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(input[i]->shape().Rank() == 1, "Only 1D tensors supported"); | |||
| RETURN_IF_NOT_OK(ConcatenateHelper(out, &out_t, axis, input[i])); | |||
| out = out_t; | |||
| tensor_list.emplace_back(input[i]); | |||
| } | |||
| std::shared_ptr<Tensor> out_t; | |||
| if (append != nullptr) { | |||
| CHECK_FAIL_RETURN_UNEXPECTED(first_dtype == append->type(), "Tensor types do not match"); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(append->shape().Rank() == 1, "Only 1D tensors supported"); | |||
| RETURN_IF_NOT_OK(ConcatenateHelper(out, &out_t, axis, append)); | |||
| } else { | |||
| out_t = out; | |||
| tensor_list.emplace_back(append); | |||
| } | |||
| output->push_back(out_t); | |||
| return Status::OK(); | |||
| } | |||
| Status ConcatenateHelper(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int8_t axis, | |||
| std::shared_ptr<Tensor> append) { | |||
| CHECK_FAIL_RETURN_UNEXPECTED(input->type() == append->type(), "Tensor types do not match"); | |||
| TensorShape t({}); | |||
| for (dsize_t i = 0; i < input->shape().Rank(); i++) { | |||
| // create final shape | |||
| for (dsize_t i = 0; i < tensor_list[0]->shape().Rank(); i++) { | |||
| if (i != axis) { | |||
| t = t.AppendDim(input->shape()[i]); | |||
| t = t.AppendDim(tensor_list[0]->shape()[i]); | |||
| } else { | |||
| dsize_t new_shape = input->shape()[i] + append->shape()[i]; | |||
| dsize_t new_shape = 0; | |||
| for (dsize_t j = 0; j < tensor_list.size(); j++) { | |||
| new_shape = tensor_list[j]->shape()[i] + new_shape; | |||
| } | |||
| t = t.AppendDim(new_shape); | |||
| } | |||
| } | |||
| std::shared_ptr<Tensor> out; | |||
| if (input->type().IsNumeric()) { | |||
| RETURN_IF_NOT_OK(Tensor::CreateEmpty(t, input->type(), &out)); | |||
| if (input[0]->type().IsNumeric()) { | |||
| RETURN_IF_NOT_OK(Tensor::CreateEmpty(t, tensor_list[0]->type(), &out)); | |||
| std::vector<dsize_t> index(axis + 1, 0); | |||
| RETURN_IF_NOT_OK(out->Concatenate({0}, input)); | |||
| RETURN_IF_NOT_OK(out->Concatenate({input->shape()[0]}, append)); | |||
| *output = out; | |||
| int n = index.size() - 1; | |||
| for (dsize_t i = 0; i < tensor_list.size(); i++) { | |||
| RETURN_IF_NOT_OK(out->InsertTensor({index}, tensor_list[i], true)); | |||
| index[n] = index[n] + tensor_list[i]->shape()[axis]; | |||
| } | |||
| } else { | |||
| std::vector<std::string> strings; | |||
| auto itr = input->begin<std::string_view>(); | |||
| for (; itr != input->end<std::string_view>(); itr++) { | |||
| strings.emplace_back(*itr); | |||
| } | |||
| itr = append->begin<std::string_view>(); | |||
| for (; itr != append->end<std::string_view>(); itr++) { | |||
| strings.emplace_back(*itr); | |||
| for (dsize_t i = 0; i < tensor_list.size(); i++) { | |||
| auto itr = tensor_list[i]->begin<std::string_view>(); | |||
| for (; itr != tensor_list[i]->end<std::string_view>(); itr++) { | |||
| strings.emplace_back(*itr); | |||
| } | |||
| } | |||
| RETURN_IF_NOT_OK(Tensor::CreateFromVector(strings, t, &out)); | |||
| *output = out; | |||
| } | |||
| output->push_back(out); | |||
| return Status::OK(); | |||
| } | |||
| } // namespace dataset | |||
| } // namespace mindspore | |||
| @@ -152,11 +152,6 @@ Status Mask(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *outpu | |||
| Status Concatenate(const TensorRow &input, TensorRow *output, int8_t axis, std::shared_ptr<Tensor> prepend, | |||
| std::shared_ptr<Tensor> append); | |||
| // helper for concat, always append to the input, and pass that to the output | |||
| Status ConcatenateHelper(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int8_t axis, | |||
| std::shared_ptr<Tensor> append); | |||
| } // namespace dataset | |||
| } // namespace mindspore | |||
| @@ -28,9 +28,8 @@ class MindDataTestConcatenateOp : public UT::Common { | |||
| }; | |||
| TEST_F(MindDataTestConcatenateOp, TestOp) { | |||
| MS_LOG(INFO) << "Doing MindDataTestConcatenate-TestOp."; | |||
| MS_LOG(INFO) << "Doing MindDataTestConcatenate-TestOp-SingleRowinput."; | |||
| std::vector<uint64_t> labels = {1, 1, 2}; | |||
| TensorShape shape({3}); | |||
| std::shared_ptr<Tensor> input; | |||
| Tensor::CreateFromVector(labels, &input); | |||
| @@ -57,3 +56,71 @@ TEST_F(MindDataTestConcatenateOp, TestOp) { | |||
| MS_LOG(DEBUG) << *expected << std::endl; | |||
| ASSERT_TRUE(*output == *expected); | |||
| } | |||
| TEST_F(MindDataTestConcatenateOp, TestOp2) { | |||
| MS_LOG(INFO) << "Doing MindDataTestConcatenate-TestOp2-MultiInput."; | |||
| std::vector<uint64_t> labels = {1, 12, 2}; | |||
| std::shared_ptr<Tensor> row_1; | |||
| Tensor::CreateFromVector(labels, &row_1); | |||
| std::shared_ptr<Tensor> row_2; | |||
| Tensor::CreateFromVector(labels, &row_2); | |||
| std::vector<uint64_t> append_labels = {4, 4, 4}; | |||
| std::shared_ptr<Tensor> append; | |||
| Tensor::CreateFromVector(append_labels, &append); | |||
| TensorRow tensor_list; | |||
| tensor_list.push_back(row_1); | |||
| tensor_list.push_back(row_2); | |||
| std::shared_ptr<Tensor> output; | |||
| std::unique_ptr<ConcatenateOp> op(new ConcatenateOp(0, nullptr, append)); | |||
| TensorRow out_row; | |||
| Status s = op->Compute(tensor_list, &out_row); | |||
| std::vector<uint64_t> out = {1, 12, 2, 1, 12, 2, 4, 4, 4}; | |||
| std::shared_ptr<Tensor> expected; | |||
| Tensor::CreateFromVector(out, &expected); | |||
| output = out_row[0]; | |||
| EXPECT_TRUE(s.IsOk()); | |||
| ASSERT_TRUE(output->shape() == expected->shape()); | |||
| ASSERT_TRUE(output->type() == expected->type()); | |||
| MS_LOG(DEBUG) << *output << std::endl; | |||
| MS_LOG(DEBUG) << *expected << std::endl; | |||
| ASSERT_TRUE(*output == *expected); | |||
| } | |||
| TEST_F(MindDataTestConcatenateOp, TestOp3) { | |||
| MS_LOG(INFO) << "Doing MindDataTestConcatenate-TestOp3-Strings."; | |||
| std::vector<std::string> labels = {"hello", "bye"}; | |||
| std::shared_ptr<Tensor> row_1; | |||
| Tensor::CreateFromVector(labels, &row_1); | |||
| std::vector<std::string> append_labels = {"1", "2", "3"}; | |||
| std::shared_ptr<Tensor> append; | |||
| Tensor::CreateFromVector(append_labels, &append); | |||
| TensorRow tensor_list; | |||
| tensor_list.push_back(row_1); | |||
| std::shared_ptr<Tensor> output; | |||
| std::unique_ptr<ConcatenateOp> op(new ConcatenateOp(0, nullptr, append)); | |||
| TensorRow out_row; | |||
| Status s = op->Compute(tensor_list, &out_row); | |||
| std::vector<std::string> out = {"hello", "bye", "1", "2", "3"}; | |||
| std::shared_ptr<Tensor> expected; | |||
| Tensor::CreateFromVector(out, &expected); | |||
| output = out_row[0]; | |||
| EXPECT_TRUE(s.IsOk()); | |||
| ASSERT_TRUE(output->shape() == expected->shape()); | |||
| ASSERT_TRUE(output->type() == expected->type()); | |||
| MS_LOG(DEBUG) << *output << std::endl; | |||
| MS_LOG(DEBUG) << *expected << std::endl; | |||
| ASSERT_TRUE(*output == *expected); | |||
| } | |||
| @@ -432,7 +432,7 @@ TEST_F(MindDataTestTensorDE, TensorSlice) { | |||
| ASSERT_EQ(*t2, *t); | |||
| } | |||
| TEST_F(MindDataTestTensorDE, TensorConcatenate) { | |||
| TEST_F(MindDataTestTensorDE, TensorPartialInsert) { | |||
| std::vector<uint32_t> values1 = {1, 2, 3, 0, 0, 0}; | |||
| std::vector<uint32_t> values2 = {4, 5, 6}; | |||
| std::vector<uint32_t> expected = {1, 2, 3, 4, 5, 6}; | |||
| @@ -445,7 +445,7 @@ TEST_F(MindDataTestTensorDE, TensorConcatenate) { | |||
| std::shared_ptr<Tensor> out; | |||
| Tensor::CreateFromVector(expected, &out); | |||
| Status s = t1->Concatenate({3}, t2); | |||
| Status s = t1->InsertTensor({3}, t2, true); | |||
| EXPECT_TRUE(s.IsOk()); | |||
| auto i = out->begin<uint32_t>(); | |||
| @@ -455,7 +455,7 @@ TEST_F(MindDataTestTensorDE, TensorConcatenate) { | |||
| } | |||
| // should fail if the concatenated vector is too large | |||
| s = t1->Concatenate({5}, t2); | |||
| s = t1->InsertTensor({5}, t2, true); | |||
| EXPECT_FALSE(s.IsOk()); | |||
| } | |||
| @@ -130,7 +130,7 @@ def test_concatenate_op_incorrect_dim(): | |||
| def gen(): | |||
| yield (np.array([["ss", "ad"], ["ss", "ad"]], dtype='S'),) | |||
| prepend_tensor = np.array([3, 5], dtype=np.float) | |||
| prepend_tensor = np.array(["ss", "ss"], dtype='S') | |||
| concatenate_op = data_trans.Concatenate(0, prepend_tensor) | |||
| data = ds.GeneratorDataset(gen, column_names=["col"]) | |||