Merge pull request !2188 from JesseKLee/deadcodetags/v0.5.0-beta
| @@ -408,8 +408,7 @@ Status ClueOp::FillIOBlockQueue(const std::vector<int64_t> &i_keys) { | |||
| break; | |||
| } | |||
| } | |||
| auto file_it = filename_index_->Search(*it); | |||
| file_index.emplace_back(std::pair<std::string, int64_t>(file_it.value(), *it)); | |||
| file_index.emplace_back(std::pair<std::string, int64_t>((*filename_index_)[*it], *it)); | |||
| } | |||
| } else { | |||
| for (auto it = filename_index_->begin(); it != filename_index_->end(); ++it) { | |||
| @@ -72,8 +72,9 @@ Status FilenameBlock::GetFilename(std::string *out_filename, const AutoIndexObj< | |||
| RETURN_IF_NOT_OK(IOBlock::GetKey(&fetched_key)); | |||
| // Do an index lookup using that key to get the filename. | |||
| auto it = index.Search(fetched_key); | |||
| if (it != index.end()) { | |||
| auto r = index.Search(fetched_key); | |||
| if (r.second) { | |||
| auto &it = r.first; | |||
| *out_filename = it.value(); | |||
| } else { | |||
| RETURN_STATUS_UNEXPECTED("Could not find filename from index"); | |||
| @@ -314,8 +314,7 @@ Status TextFileOp::FillIOBlockQueue(const std::vector<int64_t> &i_keys) { | |||
| break; | |||
| } | |||
| } | |||
| auto file_it = filename_index_->Search(*it); | |||
| file_index.emplace_back(std::pair<std::string, int64_t>(file_it.value(), *it)); | |||
| file_index.emplace_back(std::pair<std::string, int64_t>((*filename_index_)[*it], *it)); | |||
| } | |||
| } else { | |||
| for (auto it = filename_index_->begin(); it != filename_index_->end(); ++it) { | |||
| @@ -451,8 +451,7 @@ Status TFReaderOp::FillIOBlockShuffle(const std::vector<int64_t> &i_keys) { | |||
| } | |||
| } else { | |||
| // Do an index lookup using that key to get the filename. | |||
| auto file_it = filename_index_->Search(*it); | |||
| std::string file_name = file_it.value(); | |||
| std::string file_name = (*filename_index_)[*it]; | |||
| if (NeedPushFileToblockQueue(file_name, &start_offset, &end_offset, pre_count)) { | |||
| auto ioBlock = std::make_unique<FilenameBlock>(*it, start_offset, end_offset, IOBlock::kDeIoBlockNone); | |||
| RETURN_IF_NOT_OK(PushIoBlockQueue(queue_index, std::move(ioBlock))); | |||
| @@ -40,8 +40,6 @@ struct BPlusTreeTraits { | |||
| static constexpr slot_type kLeafSlots = 256; | |||
| // Number of slots in each inner node of the tree | |||
| static constexpr slot_type kInnerSlots = 128; | |||
| // If kAppendMode is true, we will split high instead of 50/50 split | |||
| static constexpr bool kAppendMode = false; | |||
| }; | |||
| /// Implementation of B+ tree | |||
| @@ -123,19 +121,14 @@ class BPlusTree { | |||
| std::unique_ptr<value_type> DoUpdate(const key_type &key, const value_type &new_value); | |||
| std::unique_ptr<value_type> DoUpdate(const key_type &key, std::unique_ptr<value_type> &&new_value); | |||
| void PopulateNumKeys(); | |||
| key_type KeyAtPos(uint64_t inx); | |||
| // Statistics | |||
| struct tree_stats { | |||
| std::atomic<uint64_t> size_; | |||
| uint32_t leaves_; | |||
| uint32_t inner_nodes_; | |||
| uint32_t level_; | |||
| bool num_keys_array_valid_; | |||
| tree_stats() : size_(0), leaves_(0), inner_nodes_(0), level_(0), num_keys_array_valid_(false) {} | |||
| tree_stats() : size_(0), leaves_(0), inner_nodes_(0), level_(0) {} | |||
| }; | |||
| private: | |||
| @@ -160,10 +153,6 @@ class BPlusTree { | |||
| Node<BaseNode> lru_; | |||
| }; | |||
| uint64_t PopulateNumKeys(BaseNode *n); | |||
| key_type KeyAtPos(BaseNode *n, uint64_t inx); | |||
| // This control block keeps track of all the nodes we traverse on insert. | |||
| // To maximize concurrency, internal nodes are latched S. If a node split | |||
| // is required, we must releases all the latches and redo it again and change | |||
| @@ -255,7 +244,6 @@ class BPlusTree { | |||
| slot_type slot_dir_[traits::kInnerSlots] = {0}; | |||
| key_type keys_[traits::kInnerSlots] = {0}; | |||
| BaseNode *data_[traits::kInnerSlots + 1] = {nullptr}; | |||
| uint64_t num_keys_[traits::kInnerSlots + 1] = {0}; | |||
| slot_type slotuse_; | |||
| }; | |||
| @@ -391,7 +379,6 @@ class BPlusTree { | |||
| Iterator operator--(int); | |||
| bool operator==(const Iterator &x) const { return (x.cur_ == cur_) && (x.slot_ == slot_); } | |||
| bool operator!=(const Iterator &x) const { return (x.cur_ != cur_) || (x.slot_ != slot_); } | |||
| private: | |||
| @@ -441,7 +428,6 @@ class BPlusTree { | |||
| ConstIterator operator--(int); | |||
| bool operator==(const ConstIterator &x) const { return (x.cur_ == cur_) && (x.slot_ == slot_); } | |||
| bool operator!=(const ConstIterator &x) const { return (x.cur_ != cur_) || (x.slot_ != slot_); } | |||
| private: | |||
| @@ -451,20 +437,17 @@ class BPlusTree { | |||
| }; | |||
| Iterator begin(); | |||
| Iterator end(); | |||
| ConstIterator begin() const; | |||
| ConstIterator end() const; | |||
| ConstIterator cbegin() const; | |||
| ConstIterator cend() const; | |||
| // Locate the entry with key | |||
| ConstIterator Search(const key_type &key) const; | |||
| Iterator Search(const key_type &key); | |||
| std::pair<ConstIterator, bool> Search(const key_type &key) const; | |||
| std::pair<Iterator, bool> Search(const key_type &key); | |||
| value_type operator[](key_type key); | |||
| }; | |||
| @@ -269,26 +269,17 @@ typename BPlusTree<K, V, A, C, T>::IndexRc BPlusTree<K, V, A, C, T>::LeafInsertK | |||
| RETURN_IF_BAD_RC(rc); | |||
| leaf_nodes_.InsertAfter(node, new_leaf); | |||
| *split_node = new_leaf; | |||
| if (slot == node->slotuse_ && traits::kAppendMode) { | |||
| // Split high. Good for bulk load and keys are in asending order on insert | |||
| *split_key = key; | |||
| // Just insert the new key to the new leaf. No further need to move the keys | |||
| // from one leaf to the other. | |||
| rc = new_leaf->InsertIntoSlot(nullptr, 0, key, std::move(value)); | |||
| // 50/50 split | |||
| rc = node->Split(new_leaf); | |||
| RETURN_IF_BAD_RC(rc); | |||
| *split_key = new_leaf->keys_[0]; | |||
| if (LessThan(key, *split_key)) { | |||
| rc = node->InsertIntoSlot(nullptr, slot, key, std::move(value)); | |||
| RETURN_IF_BAD_RC(rc); | |||
| } else { | |||
| // 50/50 split | |||
| rc = node->Split(new_leaf); | |||
| slot -= node->slotuse_; | |||
| rc = new_leaf->InsertIntoSlot(nullptr, slot, key, std::move(value)); | |||
| RETURN_IF_BAD_RC(rc); | |||
| *split_key = new_leaf->keys_[0]; | |||
| if (LessThan(key, *split_key)) { | |||
| rc = node->InsertIntoSlot(nullptr, slot, key, std::move(value)); | |||
| RETURN_IF_BAD_RC(rc); | |||
| } else { | |||
| slot -= node->slotuse_; | |||
| rc = new_leaf->InsertIntoSlot(nullptr, slot, key, std::move(value)); | |||
| RETURN_IF_BAD_RC(rc); | |||
| } | |||
| } | |||
| } | |||
| return rc; | |||
| @@ -309,25 +300,18 @@ typename BPlusTree<K, V, A, C, T>::IndexRc BPlusTree<K, V, A, C, T>::InnerInsert | |||
| rc = AllocateInner(&new_inner); | |||
| RETURN_IF_BAD_RC(rc); | |||
| *split_node = new_inner; | |||
| if (slot == node->slotuse_ && traits::kAppendMode) { | |||
| *split_key = key; | |||
| new_inner->data_[0] = node->data_[node->slotuse_]; | |||
| rc = new_inner->InsertIntoSlot(0, key, ptr); | |||
| rc = node->Split(new_inner, split_key); | |||
| RETURN_IF_BAD_RC(rc); | |||
| if (LessThan(key, *split_key)) { | |||
| // Need to readjust the slot position since the split key is no longer in the two children. | |||
| slot = FindSlot(node, key); | |||
| rc = node->InsertIntoSlot(slot, key, ptr); | |||
| RETURN_IF_BAD_RC(rc); | |||
| } else { | |||
| rc = node->Split(new_inner, split_key); | |||
| // Same reasoning as above | |||
| slot = FindSlot(new_inner, key); | |||
| rc = new_inner->InsertIntoSlot(slot, key, ptr); | |||
| RETURN_IF_BAD_RC(rc); | |||
| if (LessThan(key, *split_key)) { | |||
| // Need to readjust the slot position since the split key is no longer in the two children. | |||
| slot = FindSlot(node, key); | |||
| rc = node->InsertIntoSlot(slot, key, ptr); | |||
| RETURN_IF_BAD_RC(rc); | |||
| } else { | |||
| // Same reasoning as above | |||
| slot = FindSlot(new_inner, key); | |||
| rc = new_inner->InsertIntoSlot(slot, key, ptr); | |||
| RETURN_IF_BAD_RC(rc); | |||
| } | |||
| } | |||
| } | |||
| return rc; | |||
| @@ -377,8 +361,7 @@ typename BPlusTree<K, V, A, C, T>::IndexRc BPlusTree<K, V, A, C, T>::InsertKeyVa | |||
| } | |||
| template <typename K, typename V, typename A, typename C, typename T> | |||
| typename BPlusTree<K, V, A, C, T>::IndexRc BPlusTree<K, V, A, C, T>::Locate(RWLock *parent_lock, | |||
| bool forUpdate, | |||
| typename BPlusTree<K, V, A, C, T>::IndexRc BPlusTree<K, V, A, C, T>::Locate(RWLock *parent_lock, bool forUpdate, | |||
| BPlusTree<K, V, A, C, T>::BaseNode *top, | |||
| const key_type &key, | |||
| BPlusTree<K, V, A, C, T>::LeafNode **ln, | |||
| @@ -481,9 +464,6 @@ Status BPlusTree<K, V, A, C, T>::DoInsert(const key_type &key, std::unique_ptr<v | |||
| do { | |||
| // Track all the paths to the target and lock each internal node in S. | |||
| LockPathCB InsCB(this, retry); | |||
| // Mark the numKeysArray invalid. We may latch the tree in S and multiple guys are doing insert. | |||
| // But it is okay as we all set the same value. | |||
| stats_.num_keys_array_valid_ = false; | |||
| // Initially we lock path in S unless we need to do node split. | |||
| retry = false; | |||
| BaseNode *new_child = nullptr; | |||
| @@ -552,70 +532,6 @@ std::unique_ptr<V> BPlusTree<K, V, A, C, T>::DoUpdate(const key_type &key, std:: | |||
| } | |||
| } | |||
| template <typename K, typename V, typename A, typename C, typename T> | |||
| void BPlusTree<K, V, A, C, T>::PopulateNumKeys() { | |||
| // Start from the root and we calculate how many leaf nodes as pointed to by each inner node. | |||
| // The results are stored in the numKeys array in each inner node. | |||
| (void)PopulateNumKeys(root_); | |||
| // Indicate the result is accurate since we have the tree locked exclusive. | |||
| stats_.num_keys_array_valid_ = true; | |||
| } | |||
| template <typename K, typename V, typename A, typename C, typename T> | |||
| uint64_t BPlusTree<K, V, A, C, T>::PopulateNumKeys(BPlusTree<K, V, A, C, T>::BaseNode *n) { | |||
| if (n->is_leafnode()) { | |||
| auto *leaf = static_cast<LeafNode *>(n); | |||
| return leaf->slotuse_; | |||
| } else { | |||
| auto *inner = static_cast<InnerNode *>(n); | |||
| uint64_t num_keys = 0; | |||
| for (auto i = 0; i < inner->slotuse_ + 1; i++) { | |||
| inner->num_keys_[i] = PopulateNumKeys(inner->data_[i]); | |||
| num_keys += inner->num_keys_[i]; | |||
| } | |||
| return num_keys; | |||
| } | |||
| } | |||
| template <typename K, typename V, typename A, typename C, typename T> | |||
| typename BPlusTree<K, V, A, C, T>::key_type BPlusTree<K, V, A, C, T>::KeyAtPos(uint64_t inx) { | |||
| if (stats_.num_keys_array_valid_ == false) { | |||
| // We need exclusive access to the tree. If concurrent insert is going on, it is hard to get accurate numbers | |||
| UniqueLock lck(&rw_lock_); | |||
| // Check again. | |||
| if (stats_.num_keys_array_valid_ == false) { | |||
| PopulateNumKeys(); | |||
| } | |||
| } | |||
| // Now we know how many keys each inner branch contains, we can now traverse the correct node in log n time. | |||
| return KeyAtPos(root_, inx); | |||
| } | |||
| template <typename K, typename V, typename A, typename C, typename T> | |||
| typename BPlusTree<K, V, A, C, T>::key_type BPlusTree<K, V, A, C, T>::KeyAtPos(BPlusTree<K, V, A, C, T>::BaseNode *n, | |||
| uint64_t inx) { | |||
| if (n->is_leafnode()) { | |||
| auto *leaf = static_cast<LeafNode *>(n); | |||
| return leaf->keys_[leaf->slot_dir_[inx]]; | |||
| } else { | |||
| auto *inner = static_cast<InnerNode *>(n); | |||
| if ((inx + 1) > inner->num_keys_[0]) { | |||
| inx -= inner->num_keys_[0]; | |||
| } else { | |||
| return KeyAtPos(inner->data_[0], inx); | |||
| } | |||
| for (auto i = 0; i < inner->slotuse_; i++) { | |||
| if ((inx + 1) > inner->num_keys_[inner->slot_dir_[i] + 1]) { | |||
| inx -= inner->num_keys_[inner->slot_dir_[i] + 1]; | |||
| } else { | |||
| return KeyAtPos(inner->data_[inner->slot_dir_[i] + 1], inx); | |||
| } | |||
| } | |||
| } | |||
| // If we get here, inx is way too big. Instead of throwing exception, we will just return the default value | |||
| // of key_type whatever it is. | |||
| return key_type(); | |||
| } | |||
| } // namespace dataset | |||
| } // namespace mindspore | |||
| #endif | |||
| @@ -286,7 +286,8 @@ typename BPlusTree<K, V, A, C, T>::ConstIterator &BPlusTree<K, V, A, C, T>::Cons | |||
| } | |||
| template <typename K, typename V, typename A, typename C, typename T> | |||
| typename BPlusTree<K, V, A, C, T>::ConstIterator BPlusTree<K, V, A, C, T>::Search(const key_type &key) const { | |||
| std::pair<typename BPlusTree<K, V, A, C, T>::ConstIterator, bool> BPlusTree<K, V, A, C, T>::Search( | |||
| const key_type &key) const { | |||
| if (root_ != nullptr) { | |||
| LeafNode *leaf = nullptr; | |||
| slot_type slot; | |||
| @@ -294,21 +295,15 @@ typename BPlusTree<K, V, A, C, T>::ConstIterator BPlusTree<K, V, A, C, T>::Searc | |||
| // Lock the tree in S, pass the lock to Locate which will unlock it for us underneath. | |||
| myLock->LockShared(); | |||
| IndexRc rc = Locate(myLock, false, root_, key, &leaf, &slot); | |||
| if (rc == IndexRc::kOk) { | |||
| // All locks from the tree to the parent of leaf are all gone. We still have a S lock | |||
| // on the leaf. The unlock will be handled by the iterator when it goes out of scope. | |||
| return ConstIterator(leaf, slot, true); | |||
| } else { | |||
| MS_LOG(DEBUG) << "Key not found. rc = " << static_cast<int>(rc) << "."; | |||
| return cend(); | |||
| } | |||
| bool find = (rc == IndexRc::kOk); | |||
| return std::make_pair(ConstIterator(leaf, slot, find), find); | |||
| } else { | |||
| return cend(); | |||
| return std::make_pair(cend(), false); | |||
| } | |||
| } | |||
| template <typename K, typename V, typename A, typename C, typename T> | |||
| typename BPlusTree<K, V, A, C, T>::Iterator BPlusTree<K, V, A, C, T>::Search(const key_type &key) { | |||
| std::pair<typename BPlusTree<K, V, A, C, T>::Iterator, bool> BPlusTree<K, V, A, C, T>::Search(const key_type &key) { | |||
| if (root_ != nullptr) { | |||
| LeafNode *leaf = nullptr; | |||
| slot_type slot; | |||
| @@ -316,23 +311,17 @@ typename BPlusTree<K, V, A, C, T>::Iterator BPlusTree<K, V, A, C, T>::Search(con | |||
| // Lock the tree in S, pass the lock to Locate which will unlock it for us underneath. | |||
| myLock->LockShared(); | |||
| IndexRc rc = Locate(myLock, false, root_, key, &leaf, &slot); | |||
| if (rc == IndexRc::kOk) { | |||
| // All locks from the tree to the parent of leaf are all gone. We still have a S lock | |||
| // on the leaf. The unlock will be handled by the iterator when it goes out of scope. | |||
| return Iterator(leaf, slot, true); | |||
| } else { | |||
| MS_LOG(DEBUG) << "Key not found. rc = " << static_cast<int>(rc) << "."; | |||
| return end(); | |||
| } | |||
| bool find = (rc == IndexRc::kOk); | |||
| return std::make_pair(Iterator(leaf, slot, find), find); | |||
| } else { | |||
| return end(); | |||
| return std::make_pair(end(), false); | |||
| } | |||
| } | |||
| template <typename K, typename V, typename A, typename C, typename T> | |||
| typename BPlusTree<K, V, A, C, T>::value_type BPlusTree<K, V, A, C, T>::operator[](key_type key) { | |||
| Iterator it = Search(key); | |||
| return it.value(); | |||
| auto r = Search(key); | |||
| return r.first.value(); | |||
| } | |||
| template <typename K, typename V, typename A, typename C, typename T> | |||
| @@ -32,13 +32,8 @@ using mindspore::LogStream; | |||
| // For testing purposes, we will make the branching factor very low. | |||
| struct mytraits { | |||
| using slot_type = uint16_t; | |||
| static const slot_type kLeafSlots = 6; | |||
| static const slot_type kInnerSlots = 3; | |||
| static const bool kAppendMode = false; | |||
| }; | |||
| @@ -95,13 +90,14 @@ TEST_F(MindDataTestBPlusTree, Test1) { | |||
| // Test search | |||
| { | |||
| MS_LOG(INFO) << "Locate key " << 100 << " Expect found."; | |||
| auto it = btree.Search(100); | |||
| EXPECT_FALSE(it == btree.end()); | |||
| auto r = btree.Search(100); | |||
| auto &it = r.first; | |||
| EXPECT_TRUE(r.second); | |||
| EXPECT_EQ(it.key(), 100); | |||
| EXPECT_EQ(it.value(), "Hello World. I am 100"); | |||
| MS_LOG(INFO) << "Locate key " << 300 << " Expect not found."; | |||
| it = btree.Search(300); | |||
| EXPECT_TRUE(it == btree.end()); | |||
| auto q = btree.Search(300); | |||
| EXPECT_FALSE(q.second); | |||
| } | |||
| // Test duplicate key | |||
| @@ -169,26 +165,18 @@ TEST_F(MindDataTestBPlusTree, Test2) { | |||
| { | |||
| MS_LOG(INFO) << "Locating key from 0 to 9999. Expect found."; | |||
| for (int i = 0; i < 10000; i++) { | |||
| auto it = btree.Search(i); | |||
| bool eoS = (it == btree.end()); | |||
| EXPECT_FALSE(eoS); | |||
| if (!eoS) { | |||
| auto r = btree.Search(i); | |||
| EXPECT_TRUE(r.second); | |||
| if (r.second) { | |||
| auto &it = r.first; | |||
| EXPECT_EQ(it.key(), i); | |||
| std::string val = "Hello World. I am " + std::to_string(i); | |||
| EXPECT_EQ(it.value(), val); | |||
| } | |||
| } | |||
| MS_LOG(INFO) << "Locate key " << 10000 << ". Expect not found"; | |||
| auto it = btree.Search(10000); | |||
| EXPECT_TRUE(it == btree.end()); | |||
| } | |||
| // Test to retrieve key at certain position. | |||
| { | |||
| for (int i = 0; i < 10000; i++) { | |||
| int k = btree.KeyAtPos(i); | |||
| EXPECT_EQ(k, i); | |||
| } | |||
| auto q = btree.Search(10000); | |||
| EXPECT_FALSE(q.second); | |||
| } | |||
| } | |||
| @@ -204,7 +192,8 @@ TEST_F(MindDataTestBPlusTree, Test3) { | |||
| uint64_t max = ai.max_key(); | |||
| EXPECT_EQ(min, 1); | |||
| EXPECT_EQ(max, 4); | |||
| auto it = ai.Search(3); | |||
| auto r = ai.Search(3); | |||
| auto &it = r.first; | |||
| EXPECT_EQ(it.value(), "b"); | |||
| MS_LOG(INFO) << "Dump all the values using [] operator."; | |||
| for (uint64_t i = min; i <= max; i++) { | |||