Browse Source

Disable getter pass

tags/v1.1.0
Eric 5 years ago
parent
commit
db2a8b5e1d
14 changed files with 37 additions and 43 deletions
  1. +1
    -0
      mindspore/ccsrc/minddata/dataset/engine/datasetops/dataset_op.cc
  2. +1
    -9
      mindspore/ccsrc/minddata/dataset/engine/opt/pre/getter_pass.cc
  3. +1
    -1
      mindspore/ccsrc/minddata/dataset/text/kernels/basic_tokenizer_op.cc
  4. +1
    -1
      mindspore/ccsrc/minddata/dataset/text/kernels/case_fold_op.cc
  5. +2
    -2
      mindspore/ccsrc/minddata/dataset/text/kernels/jieba_tokenizer_op.cc
  6. +2
    -2
      mindspore/ccsrc/minddata/dataset/text/kernels/ngram_op.cc
  7. +7
    -7
      mindspore/ccsrc/minddata/dataset/text/kernels/normalize_utf8_op.cc
  8. +5
    -4
      mindspore/ccsrc/minddata/dataset/text/kernels/regex_replace_op.cc
  9. +6
    -6
      mindspore/ccsrc/minddata/dataset/text/kernels/sentence_piece_tokenizer_op.cc
  10. +1
    -1
      mindspore/ccsrc/minddata/dataset/text/kernels/truncate_sequence_pair_op.cc
  11. +2
    -2
      mindspore/ccsrc/minddata/dataset/text/kernels/unicode_char_tokenizer_op.cc
  12. +2
    -2
      mindspore/ccsrc/minddata/dataset/text/kernels/whitespace_tokenizer_op.cc
  13. +1
    -1
      mindspore/ccsrc/minddata/dataset/text/kernels/wordpiece_tokenizer_op.cc
  14. +5
    -5
      tests/ut/cpp/dataset/optimization_pass_test.cc

+ 1
- 0
mindspore/ccsrc/minddata/dataset/engine/datasetops/dataset_op.cc View File

@@ -323,6 +323,7 @@ Status DatasetOp::GetNumClasses(int64_t *num_classes) {
return child_[child_.size() - 1]->GetNumClasses(num_classes);
} else {
// when num classes isn't found, the default behavior is to return -1
MS_LOG(WARNING) << "Num classes not defined for : " << Name();
*num_classes = -1;
return Status::OK();
}


+ 1
- 9
mindspore/ccsrc/minddata/dataset/engine/opt/pre/getter_pass.cc View File

@@ -54,15 +54,7 @@ Status GetterPass::GetterNodes::RunOnNode(std::shared_ptr<FilterOp> node, bool *
Status GetterPass::RunOnTree(ExecutionTree *tree, bool *modified) {
RETURN_IF_NOT_OK(pass_.Run(tree, modified));

// nested private class variables can be directly accessed by its outer class
for (auto node : pass_.nodes_to_remove_) {
DatasetOp *parent;
node->Parent(&parent, 0);
// only remove node whose is a single child of its parent
if (parent != nullptr && parent->Children().size() == 1) {
RETURN_IF_NOT_OK(node->Remove());
}
}
// currently the getter pass only disables call_back from the execution tree

// clear the callback for selected ops (map when its GetOutputType/Shape)
for (auto node : pass_.nodes_to_clear_callback_) node->ClearCallbacks();


+ 1
- 1
mindspore/ccsrc/minddata/dataset/text/kernels/basic_tokenizer_op.cc View File

@@ -131,7 +131,7 @@ Status BasicTokenizerOp::CaseFoldWithoutUnusedWords(const std::string_view &text
Status BasicTokenizerOp::CaseFoldWithoutUnusedWords(const std::shared_ptr<Tensor> &input,
std::shared_ptr<Tensor> *output) {
IO_CHECK(input, output);
CHECK_FAIL_RETURN_UNEXPECTED(input->type() == DataType::DE_STRING, "Input tensor not of type string");
CHECK_FAIL_RETURN_UNEXPECTED(input->type() == DataType::DE_STRING, "Input tensor not of type string.");
std::vector<std::string> strs(input->Size());
int i = 0;
for (auto iter = input->begin<std::string_view>(); iter != input->end<std::string_view>(); iter++) {


+ 1
- 1
mindspore/ccsrc/minddata/dataset/text/kernels/case_fold_op.cc View File

@@ -29,7 +29,7 @@ namespace dataset {

Status CaseFoldOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
IO_CHECK(input, output);
CHECK_FAIL_RETURN_UNEXPECTED(input->type() == DataType::DE_STRING, "Input tensor not of type string");
CHECK_FAIL_RETURN_UNEXPECTED(input->type() == DataType::DE_STRING, "Input tensor not of type string.");
icu::ErrorCode error;
const icu::Normalizer2 *nfkc_case_fold = icu::Normalizer2::getNFKCCasefoldInstance(error);
CHECK_FAIL_RETURN_UNEXPECTED(error.isSuccess(), "getNFKCCasefoldInstance failed.");


+ 2
- 2
mindspore/ccsrc/minddata/dataset/text/kernels/jieba_tokenizer_op.cc View File

@@ -33,11 +33,11 @@ JiebaTokenizerOp::JiebaTokenizerOp(const std::string &hmm_path, const std::strin

Status JiebaTokenizerOp::Compute(const TensorRow &input, TensorRow *output) {
IO_CHECK_VECTOR(input, output);
CHECK_FAIL_RETURN_UNEXPECTED(input.size() == 1, "Input should be one tensor");
CHECK_FAIL_RETURN_UNEXPECTED(input.size() == 1, "Input should be one tensor.");
RETURN_UNEXPECTED_IF_NULL(jieba_parser_);

if (input[0]->Rank() != 0 || input[0]->type() != DataType::DE_STRING) {
RETURN_STATUS_UNEXPECTED("the input tensor should be scalar string tensor");
RETURN_STATUS_UNEXPECTED("the input tensor should be scalar string tensor.");
}

std::string_view sentence_v;


+ 2
- 2
mindspore/ccsrc/minddata/dataset/text/kernels/ngram_op.cc View File

@@ -35,7 +35,7 @@ NgramOp::NgramOp(const std::vector<int32_t> &ngrams, int32_t l_len, int32_t r_le

Status NgramOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
IO_CHECK(input, output);
CHECK_FAIL_RETURN_UNEXPECTED(input->type() == DataType::DE_STRING && input->Rank() == 1, "Not a 1-D str Tensor");
CHECK_FAIL_RETURN_UNEXPECTED(input->type() == DataType::DE_STRING && input->Rank() == 1, "Not a 1-D str Tensor.");
std::vector<int32_t> offsets; // offsets for each str
std::vector<std::string> res; // holds the result of ngrams
std::string str_buffer; // concat all pad tokens with string interleaved with separators
@@ -60,7 +60,7 @@ Status NgramOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Te
if (end_ind - start_ind <= n) {
res.emplace_back(std::string()); // push back empty string
} else {
CHECK_FAIL_RETURN_UNEXPECTED(end_ind - n >= 0, "Incorrect loop condition");
CHECK_FAIL_RETURN_UNEXPECTED(end_ind - n >= 0, "Incorrect loop condition.");

for (int i = start_ind; i < end_ind - n; i++) {
res.emplace_back(str_buffer.substr(offsets[i], offsets[i + n] - offsets[i] - separator_.size()));


+ 7
- 7
mindspore/ccsrc/minddata/dataset/text/kernels/normalize_utf8_op.cc View File

@@ -29,7 +29,7 @@ namespace dataset {
const NormalizeForm NormalizeUTF8Op::kDefNormalizeForm = NormalizeForm::kNfkc;
Status NormalizeUTF8Op::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
IO_CHECK(input, output);
CHECK_FAIL_RETURN_UNEXPECTED(input->type() == DataType::DE_STRING, "Input tensor not of type string");
CHECK_FAIL_RETURN_UNEXPECTED(input->type() == DataType::DE_STRING, "Input tensor not of type string.");

icu::ErrorCode error;
const icu::Normalizer2 *normalize = nullptr;
@@ -40,26 +40,26 @@ Status NormalizeUTF8Op::Compute(const std::shared_ptr<Tensor> &input, std::share
}
case NormalizeForm::kNfc: {
normalize = icu::Normalizer2::getNFCInstance(error);
CHECK_FAIL_RETURN_UNEXPECTED(error.isSuccess(), "getNFCInstance failed");
CHECK_FAIL_RETURN_UNEXPECTED(error.isSuccess(), "getNFCInstance failed.");
break;
}
case NormalizeForm::kNfkc: {
normalize = icu::Normalizer2::getNFKCInstance(error);
CHECK_FAIL_RETURN_UNEXPECTED(error.isSuccess(), "getNFKCInstance failed");
CHECK_FAIL_RETURN_UNEXPECTED(error.isSuccess(), "getNFKCInstance failed.");
break;
}
case NormalizeForm::kNfd: {
normalize = icu::Normalizer2::getNFDInstance(error);
CHECK_FAIL_RETURN_UNEXPECTED(error.isSuccess(), "getNFDInstance failed");
CHECK_FAIL_RETURN_UNEXPECTED(error.isSuccess(), "getNFDInstance failed.");
break;
}
case NormalizeForm::kNfkd: {
normalize = icu::Normalizer2::getNFKDInstance(error);
CHECK_FAIL_RETURN_UNEXPECTED(error.isSuccess(), "getNFKDInstance failed");
CHECK_FAIL_RETURN_UNEXPECTED(error.isSuccess(), "getNFKDInstance failed.");
break;
}
default: {
RETURN_STATUS_UNEXPECTED("unexpected normalize form");
RETURN_STATUS_UNEXPECTED("Unexpected normalize form.");
break;
}
}
@@ -68,7 +68,7 @@ Status NormalizeUTF8Op::Compute(const std::shared_ptr<Tensor> &input, std::share
for (auto iter = input->begin<std::string_view>(); iter != input->end<std::string_view>(); iter++) {
icu::StringByteSink<std::string> sink(&strs[i++]);
normalize->normalizeUTF8(0, icu::StringPiece((*iter).data(), (*iter).size()), sink, nullptr, error);
CHECK_FAIL_RETURN_UNEXPECTED(error.isSuccess(), "normalizeUTF8 failed.");
CHECK_FAIL_RETURN_UNEXPECTED(error.isSuccess(), "NormalizeUTF8 failed.");
}
return Tensor::CreateFromVector(strs, input->shape(), output);
}


+ 5
- 4
mindspore/ccsrc/minddata/dataset/text/kernels/regex_replace_op.cc View File

@@ -25,7 +25,7 @@ namespace dataset {

Status RegexReplaceOp::RegexReplace(icu::RegexMatcher *const matcher, const std::string_view &text,
std::string *out) const {
CHECK_FAIL_RETURN_UNEXPECTED((matcher != nullptr && out != nullptr), "Input is null");
CHECK_FAIL_RETURN_UNEXPECTED((matcher != nullptr && out != nullptr), "Input is null.");
UErrorCode icu_error = U_ZERO_ERROR;
icu::UnicodeString unicode_text = icu::UnicodeString::fromUTF8(text);
matcher->reset(unicode_text);
@@ -35,17 +35,18 @@ Status RegexReplaceOp::RegexReplace(icu::RegexMatcher *const matcher, const std:
} else {
unicode_out = matcher->replaceFirst(replace_, icu_error);
}
CHECK_FAIL_RETURN_UNEXPECTED(U_SUCCESS(icu_error), "RegexReplace failed");
CHECK_FAIL_RETURN_UNEXPECTED(U_SUCCESS(icu_error), "RegexReplace failed.");
unicode_out.toUTF8String(*out);
return Status::OK();
}

Status RegexReplaceOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
IO_CHECK(input, output);
CHECK_FAIL_RETURN_UNEXPECTED(input->type() == DataType::DE_STRING, "Input tensor not of type string");
CHECK_FAIL_RETURN_UNEXPECTED(input->type() == DataType::DE_STRING, "Input tensor not of type string.");
UErrorCode icu_error = U_ZERO_ERROR;
icu::RegexMatcher matcher(pattern_, 0, icu_error);
CHECK_FAIL_RETURN_UNEXPECTED(U_SUCCESS(icu_error), "Create icu RegexMatcher failed, you may input one error pattern");
CHECK_FAIL_RETURN_UNEXPECTED(U_SUCCESS(icu_error),
"Create icu RegexMatcher failed, you may input one error pattern.");
std::vector<std::string> strs(input->Size());
int i = 0;
for (auto iter = input->begin<std::string_view>(); iter != input->end<std::string_view>(); iter++) {


+ 6
- 6
mindspore/ccsrc/minddata/dataset/text/kernels/sentence_piece_tokenizer_op.cc View File

@@ -56,7 +56,7 @@ Status SentencePieceTokenizerOp::Compute(const std::shared_ptr<Tensor> &input, s
}

if (input->Rank() != 0 || input->type() != DataType::DE_STRING) {
RETURN_STATUS_UNEXPECTED("the input tensor should be scalar string tensor");
RETURN_STATUS_UNEXPECTED("Input tensor should be scalar string tensor.");
}

std::string_view sentence_v;
@@ -67,14 +67,14 @@ Status SentencePieceTokenizerOp::Compute(const std::shared_ptr<Tensor> &input, s
std::vector<std::string> pieces;
auto status = processor_.Encode(sentence, &pieces);
if (!status.ok()) {
RETURN_STATUS_UNEXPECTED("sentence piece tokenizer error");
RETURN_STATUS_UNEXPECTED("Sentence piece tokenizer error.");
}
RETURN_IF_NOT_OK(Tensor::CreateFromVector(pieces, output));
} else {
std::vector<int> ids;
auto status = processor_.Encode(sentence, &ids);
if (!status.ok()) {
RETURN_STATUS_UNEXPECTED("sentence piece tokenizer error");
RETURN_STATUS_UNEXPECTED("Sentence piece tokenizer error.");
}
RETURN_IF_NOT_OK(Tensor::CreateFromVector(ids, output));
}
@@ -84,15 +84,15 @@ Status SentencePieceTokenizerOp::Compute(const std::shared_ptr<Tensor> &input, s
Status SentencePieceTokenizerOp::GetModelRealPath(const std::string &model_path, const std::string &filename) {
char real_path[PATH_MAX] = {0};
if (file_path_.size() >= PATH_MAX) {
RETURN_STATUS_UNEXPECTED("sentence piece model path is invalid.");
RETURN_STATUS_UNEXPECTED("Sentence piece model path is invalid.");
}
#if defined(_WIN32) || defined(_WIN64)
if (_fullpath(real_path, common::SafeCStr(model_path), PATH_MAX) == nullptr) {
RETURN_STATUS_UNEXPECTED("sentence piece model path is invalid.");
RETURN_STATUS_UNEXPECTED("Sentence piece model path is invalid.");
}
#else
if (realpath(common::SafeCStr(model_path), real_path) == nullptr) {
RETURN_STATUS_UNEXPECTED("sentence piece model path is invalid.");
RETURN_STATUS_UNEXPECTED("Sentence piece model path is invalid.");
}
#endif
std::string abs_path = real_path;


+ 1
- 1
mindspore/ccsrc/minddata/dataset/text/kernels/truncate_sequence_pair_op.cc View File

@@ -29,7 +29,7 @@ Status TruncateSequencePairOp::Compute(const TensorRow &input, TensorRow *output
std::shared_ptr<Tensor> seq1 = input[0];
std::shared_ptr<Tensor> seq2 = input[1];
CHECK_FAIL_RETURN_UNEXPECTED(seq1->shape().Rank() == 1 && seq2->shape().Rank() == 1,
"Both sequences should be of rank 1");
"Both sequences should be of rank 1.");
dsize_t length1 = seq1->shape()[0];
dsize_t length2 = seq2->shape()[0];
dsize_t outLength1 = length1;


+ 2
- 2
mindspore/ccsrc/minddata/dataset/text/kernels/unicode_char_tokenizer_op.cc View File

@@ -31,9 +31,9 @@ const bool UnicodeCharTokenizerOp::kDefWithOffsets = false;

Status UnicodeCharTokenizerOp::Compute(const TensorRow &input, TensorRow *output) {
IO_CHECK_VECTOR(input, output);
CHECK_FAIL_RETURN_UNEXPECTED(input.size() == 1, "Input should be one tensor");
CHECK_FAIL_RETURN_UNEXPECTED(input.size() == 1, "Input should be one tensor.");
if (input[0]->Rank() != 0 || input[0]->type() != DataType::DE_STRING) {
RETURN_STATUS_UNEXPECTED("The input tensor should be scalar string tensor");
RETURN_STATUS_UNEXPECTED("The input tensor should be scalar string tensor.");
}
std::string_view str;
RETURN_IF_NOT_OK(input[0]->GetItemAt(&str, {}));


+ 2
- 2
mindspore/ccsrc/minddata/dataset/text/kernels/whitespace_tokenizer_op.cc View File

@@ -35,9 +35,9 @@ const bool WhitespaceTokenizerOp::kDefWithOffsets = false;

Status WhitespaceTokenizerOp::Compute(const TensorRow &input, TensorRow *output) {
IO_CHECK_VECTOR(input, output);
CHECK_FAIL_RETURN_UNEXPECTED(input.size() == 1, "Input should be one tensor");
CHECK_FAIL_RETURN_UNEXPECTED(input.size() == 1, "Input should be one tensor.");
if (input[0]->Rank() != 0 || input[0]->type() != DataType::DE_STRING) {
RETURN_STATUS_UNEXPECTED("The input tensor should be scalar string tensor");
RETURN_STATUS_UNEXPECTED("The input tensor should be scalar string tensor.");
}
std::string_view str;
RETURN_IF_NOT_OK(input[0]->GetItemAt(&str, {}));


+ 1
- 1
mindspore/ccsrc/minddata/dataset/text/kernels/wordpiece_tokenizer_op.cc View File

@@ -117,7 +117,7 @@ Status WordpieceTokenizerOp::GetTokens(const std::string &input_token, const uin
Status WordpieceTokenizerOp::Compute(const TensorRow &input, TensorRow *output) {
IO_CHECK_VECTOR(input, output);
if (input[0]->Rank() > 1 || input[0]->type() != DataType::DE_STRING) {
RETURN_STATUS_UNEXPECTED("The input tensor should be scalar or 1-D string tensor");
RETURN_STATUS_UNEXPECTED("The input tensor should be scalar or 1-D string tensor.");
}
dsize_t count = 0;
std::vector<std::string> out_tokens;


+ 5
- 5
tests/ut/cpp/dataset/optimization_pass_test.cc View File

@@ -95,9 +95,9 @@ TEST_F(MindDataTestOptimizationPass, MindDataTestOutputShapeAndTypePass) {
// +- ( 4) <RandomDataOp>: [workers: 4] [total rows: 44]
//

// verify that Shuffle and RepeatOp are removed, but Batch and ProjectOp are not
EXPECT_EQ(ss_str.find("ShuffleOp"), ss_str.npos);
EXPECT_EQ(ss_str.find("RepeatOp"), ss_str.npos);
// verify that no ops are removed, but Batch and ProjectOp are not
EXPECT_NE(ss_str.find("ShuffleOp"), ss_str.npos);
EXPECT_NE(ss_str.find("RepeatOp"), ss_str.npos);
EXPECT_NE(ss_str.find("ProjectOp"), ss_str.npos);
EXPECT_NE(ss_str.find("BatchOp"), ss_str.npos);
}
@@ -129,8 +129,8 @@ TEST_F(MindDataTestOptimizationPass, MindDataTestDatasetSizePass) {
exe_tree->Print(ss);
std::string ss_str = ss.str();

// verify that Shuffle and RepeatOp are removed, but Batch and ProjectOp are not
EXPECT_EQ(ss_str.find("ShuffleOp"), ss_str.npos);
// verify that no ops are removed, but Batch and ProjectOp are not
EXPECT_NE(ss_str.find("ShuffleOp"), ss_str.npos);
EXPECT_NE(ss_str.find("RepeatOp"), ss_str.npos);
EXPECT_NE(ss_str.find("ProjectOp"), ss_str.npos);
EXPECT_NE(ss_str.find("BatchOp"), ss_str.npos);


Loading…
Cancel
Save