You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

btree.h 14 kB

6 years ago
6 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459
  1. /**
  2. * Copyright 2019 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_INDEX_H_
  17. #define MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_INDEX_H_
  18. #include <algorithm>
  19. #include <atomic>
  20. #include <functional>
  21. #include <utility>
  22. #include <memory>
  23. #include <deque>
  24. #include "./securec.h"
  25. #include "minddata/dataset/util/allocator.h"
  26. #include "minddata/dataset/util/list.h"
  27. #include "minddata/dataset/util/lock.h"
  28. #include "minddata/dataset/util/memory_pool.h"
  29. #include "minddata/dataset/util/services.h"
  30. #include "minddata/dataset/util/status.h"
  31. namespace mindspore {
  32. namespace dataset {
  33. // Default traits for a B+ tree
  34. struct BPlusTreeTraits {
  35. // This determines the limit of number of keys in a node.
  36. using slot_type = uint16_t;
  37. // Number of slots in each leaf of the tree.
  38. static constexpr slot_type kLeafSlots = 256;
  39. // Number of slots in each inner node of the tree
  40. static constexpr slot_type kInnerSlots = 128;
  41. };
  42. /// Implementation of B+ tree
  43. /// @tparam K -- the type of key
  44. /// @tparam V -- the type of value
  45. /// @tparam A -- allocator
  46. /// @tparam C -- comparison class
  47. /// @tparam T -- trait
  48. template <typename K, typename V, typename A = std::allocator<V>, typename C = std::less<K>,
  49. typename T = BPlusTreeTraits>
  50. class BPlusTree {
  51. public:
  52. enum class IndexRc : char {
  53. kOk = 0,
  54. kDuplicateKey = 1,
  55. kSlotFull = 2,
  56. kKeyNotFound = 3,
  57. kNullPointer = 4,
  58. kOutOfMemory = 5,
  59. kRetry = 6,
  60. kUnexpectedError = 127
  61. };
  62. #define RETURN_IF_BAD_RC(_s) \
  63. do { \
  64. IndexRc __rc = (_s); \
  65. if (__rc != IndexRc::kOk) { \
  66. return __rc; \
  67. } \
  68. } while (false)
  69. Status IndexRc2Status(IndexRc rc) {
  70. if (rc == IndexRc::kOk) {
  71. return Status(StatusCode::kOK);
  72. } else if (rc == IndexRc::kOutOfMemory) {
  73. return Status(StatusCode::kOutOfMemory);
  74. } else if (rc == IndexRc::kDuplicateKey) {
  75. return Status(StatusCode::kDuplicateKey);
  76. } else {
  77. RETURN_STATUS_UNEXPECTED(std::to_string(static_cast<int>(rc)));
  78. }
  79. }
  80. using key_type = K;
  81. using value_type = V;
  82. using key_compare = C;
  83. using slot_type = typename T::slot_type;
  84. using traits = T;
  85. using value_allocator = A;
  86. using key_allocator = typename value_allocator::template rebind<key_type>::other;
  87. using slot_allocator = typename value_allocator::template rebind<slot_type>::other;
  88. BPlusTree();
  89. explicit BPlusTree(const Allocator<V> &alloc);
  90. ~BPlusTree() noexcept;
  91. BPlusTree(const BPlusTree &) = delete;
  92. BPlusTree(BPlusTree &&) = delete;
  93. BPlusTree &operator=(const BPlusTree &) = delete;
  94. BPlusTree &operator=(BPlusTree &&) = delete;
  95. key_compare key_comp() const { return key_less_; }
  96. size_t size() const { return stats_.size_; }
  97. bool empty() const { return (size() == 0); }
  98. /// @param key
  99. /// @param value
  100. /// @return
  101. Status DoInsert(const key_type &key, const value_type &value);
  102. Status DoInsert(const key_type &key, std::unique_ptr<value_type> &&value);
  103. // Update a new value for a given key.
  104. std::unique_ptr<value_type> DoUpdate(const key_type &key, const value_type &new_value);
  105. std::unique_ptr<value_type> DoUpdate(const key_type &key, std::unique_ptr<value_type> &&new_value);
  106. // Statistics
  107. struct tree_stats {
  108. std::atomic<uint64_t> size_;
  109. uint32_t leaves_;
  110. uint32_t inner_nodes_;
  111. uint32_t level_;
  112. tree_stats() : size_(0), leaves_(0), inner_nodes_(0), level_(0) {}
  113. };
  114. private:
  115. // Abstract class of a node (leaf or inner)
  116. class BaseNode {
  117. public:
  118. friend class BPlusTree;
  119. virtual bool is_leafnode() const = 0;
  120. virtual bool is_full() const = 0;
  121. explicit BaseNode(const value_allocator &alloc) : alloc_(alloc) {}
  122. virtual ~BaseNode() = default;
  123. protected:
  124. mutable RWLock rw_lock_;
  125. value_allocator alloc_;
  126. private:
  127. Node<BaseNode> lru_;
  128. };
  129. // This control block keeps track of all the nodes we traverse on insert.
  130. // To maximize concurrency, internal nodes are latched S. If a node split
  131. // is required, we must releases all the latches and redo it again and change
  132. // the latch mode from S to X.
  133. struct LockPathCB {
  134. enum class LockMode : char { kShared = 0, kExclusive = 1, kNone = 2 };
  135. struct path {
  136. BaseNode *node_;
  137. bool locked_;
  138. path() : node_(nullptr), locked_(false) {}
  139. path(BaseNode *p, LockMode lockmode) : node_(p), locked_(false) {
  140. if (lockmode == LockMode::kExclusive) {
  141. p->rw_lock_.LockExclusive();
  142. locked_ = true;
  143. } else if (lockmode == LockMode::kShared) {
  144. p->rw_lock_.LockShared();
  145. locked_ = true;
  146. }
  147. }
  148. };
  149. LockPathCB(BPlusTree *tree, bool retryWithXlock) : self_(tree), latch_shared_(true) {
  150. if (retryWithXlock) {
  151. latch_shared_ = false;
  152. }
  153. if (latch_shared_) {
  154. tree->rw_lock_.LockShared();
  155. } else {
  156. tree->rw_lock_.LockExclusive();
  157. }
  158. }
  159. ~LockPathCB() noexcept {
  160. // Make sure all locks are released.
  161. while (!paths_.empty()) {
  162. path p = paths_.back();
  163. paths_.pop_back();
  164. if (p.locked_) {
  165. p.node_->rw_lock_.Unlock();
  166. }
  167. }
  168. self_->rw_lock_.Unlock();
  169. self_ = nullptr;
  170. }
  171. void LockNode(BaseNode *p, LockMode locktype) { paths_.emplace_back(p, locktype); }
  172. void UnlockMyParents(BaseNode *me) {
  173. path p = paths_.front();
  174. while (p.node_ != me) {
  175. if (p.locked_) {
  176. p.node_->rw_lock_.Unlock();
  177. }
  178. paths_.pop_front();
  179. p = paths_.front();
  180. }
  181. }
  182. BPlusTree *self_;
  183. std::deque<path> paths_;
  184. bool latch_shared_;
  185. };
  186. // Definition of inner node which fans to either inner node or leaf node.
  187. class InnerNode : public BaseNode {
  188. public:
  189. friend class BPlusTree;
  190. using alloc_type = typename value_allocator::template rebind<InnerNode>::other;
  191. bool is_leafnode() const override { return false; }
  192. bool is_full() const override { return (slotuse_ == traits::kInnerSlots); }
  193. IndexRc Sort();
  194. // 50/50 split
  195. IndexRc Split(InnerNode *to, key_type *split_key);
  196. IndexRc InsertIntoSlot(slot_type slot, const key_type &key, BaseNode *ptr);
  197. explicit InnerNode(const value_allocator &alloc) : BaseNode::BaseNode(alloc), slotuse_(0) {}
  198. ~InnerNode() = default;
  199. slot_type slot_dir_[traits::kInnerSlots] = {0};
  200. key_type keys_[traits::kInnerSlots] = {0};
  201. BaseNode *data_[traits::kInnerSlots + 1] = {nullptr};
  202. slot_type slotuse_;
  203. };
  204. // Definition of a leaf node which contains the key/value pair
  205. class LeafNode : public BaseNode {
  206. public:
  207. friend class BPlusTree;
  208. using alloc_type = typename value_allocator::template rebind<LeafNode>::other;
  209. Node<LeafNode> link_;
  210. bool is_leafnode() const override { return true; }
  211. bool is_full() const override { return (slotuse_ == traits::kLeafSlots); }
  212. IndexRc Sort();
  213. // 50/50 split
  214. IndexRc Split(LeafNode *to);
  215. IndexRc InsertIntoSlot(LockPathCB *insCB, slot_type slot, const key_type &key, std::unique_ptr<value_type> &&value);
  216. explicit LeafNode(const value_allocator &alloc) : BaseNode::BaseNode(alloc), slotuse_(0) {}
  217. ~LeafNode() = default;
  218. slot_type slot_dir_[traits::kLeafSlots] = {0};
  219. key_type keys_[traits::kLeafSlots] = {0};
  220. std::unique_ptr<value_type> data_[traits::kLeafSlots];
  221. slot_type slotuse_;
  222. };
  223. mutable RWLock rw_lock_;
  224. value_allocator alloc_;
  225. // All the leaf nodes. Used by the iterator to traverse all the key/values.
  226. List<LeafNode> leaf_nodes_;
  227. // All the nodes (inner + leaf). Used by the destructor to free the memory of all the nodes.
  228. List<BaseNode> all_;
  229. // Pointer to the root of the tree.
  230. BaseNode *root_;
  231. // Key comparison object
  232. key_compare key_less_;
  233. // Stat
  234. tree_stats stats_;
  235. bool LessThan(const key_type &a, const key_type &b) const { return key_less_(a, b); }
  236. bool EqualOrLessThan(const key_type &a, const key_type &b) const { return !key_less_(b, a); }
  237. bool Equal(const key_type &a, const key_type &b) const { return !key_less_(a, b) && !key_less_(b, a); }
  238. IndexRc AllocateInner(InnerNode **p);
  239. IndexRc AllocateLeaf(LeafNode **p);
  240. template <typename node_type>
  241. slot_type FindSlot(const node_type *node, const key_type &key, bool *duplicate = nullptr) const {
  242. slot_type lo = 0;
  243. while (lo < node->slotuse_ && key_comp()(node->keys_[node->slot_dir_[lo]], key)) {
  244. ++lo;
  245. }
  246. bool keymatch = (lo < node->slotuse_ && Equal(key, node->keys_[node->slot_dir_[lo]]));
  247. if (keymatch && !node->is_leafnode()) {
  248. // For an inner node and we match a key during search, we should look into the next slot.
  249. ++lo;
  250. }
  251. if (duplicate != nullptr) {
  252. *duplicate = keymatch;
  253. }
  254. return lo;
  255. }
  256. IndexRc LeafInsertKeyValue(LockPathCB *ins_cb, LeafNode *node, const key_type &key,
  257. std::unique_ptr<value_type> &&value, key_type *split_key, LeafNode **split_node);
  258. IndexRc InnerInsertKeyChild(InnerNode *node, const key_type &key, BaseNode *ptr, key_type *split_key,
  259. InnerNode **split_node);
  260. inline BaseNode *FindBranch(InnerNode *inner, slot_type slot) const {
  261. BaseNode *child = nullptr;
  262. if (slot == 0) {
  263. child = inner->data_[0];
  264. } else {
  265. child = inner->data_[inner->slot_dir_[slot - 1] + 1];
  266. }
  267. return child;
  268. }
  269. IndexRc InsertKeyValue(LockPathCB *ins_cb, BaseNode *n, const key_type &key, std::unique_ptr<value_type> &&value,
  270. key_type *split_key, BaseNode **split_node);
  271. IndexRc Locate(RWLock *parent_lock, bool forUpdate, BaseNode *top, const key_type &key, LeafNode **ln,
  272. slot_type *s) const;
  273. public:
  274. class Iterator : public std::iterator<std::bidirectional_iterator_tag, value_type> {
  275. public:
  276. using reference = BPlusTree::value_type &;
  277. using pointer = BPlusTree::value_type *;
  278. explicit Iterator(BPlusTree *btree) : cur_(btree->leaf_nodes_.head), slot_(0), locked_(false) {}
  279. Iterator(LeafNode *leaf, slot_type slot, bool locked = false) : cur_(leaf), slot_(slot), locked_(locked) {}
  280. ~Iterator();
  281. explicit Iterator(const Iterator &);
  282. Iterator &operator=(const Iterator &lhs);
  283. Iterator(Iterator &&);
  284. Iterator &operator=(Iterator &&lhs);
  285. pointer operator->() const { return cur_->data_[cur_->slot_dir_[slot_]].get(); }
  286. reference operator*() const { return *(cur_->data_[cur_->slot_dir_[slot_]].get()); }
  287. const key_type &key() const { return cur_->keys_[cur_->slot_dir_[slot_]]; }
  288. value_type &value() const { return *(cur_->data_[cur_->slot_dir_[slot_]].get()); }
  289. // Prefix++
  290. Iterator &operator++();
  291. // Postfix++
  292. Iterator operator++(int);
  293. // Prefix--
  294. Iterator &operator--();
  295. // Postfix--
  296. Iterator operator--(int);
  297. bool operator==(const Iterator &x) const { return (x.cur_ == cur_) && (x.slot_ == slot_); }
  298. bool operator!=(const Iterator &x) const { return (x.cur_ != cur_) || (x.slot_ != slot_); }
  299. private:
  300. typename BPlusTree::LeafNode *cur_;
  301. slot_type slot_;
  302. bool locked_;
  303. };
  304. class ConstIterator : public std::iterator<std::bidirectional_iterator_tag, value_type> {
  305. public:
  306. using reference = BPlusTree::value_type &;
  307. using pointer = BPlusTree::value_type *;
  308. explicit ConstIterator(const BPlusTree *btree) : cur_(btree->leaf_nodes_.head), slot_(0), locked_(false) {}
  309. ~ConstIterator();
  310. ConstIterator(const LeafNode *leaf, slot_type slot, bool locked = false)
  311. : cur_(leaf), slot_(slot), locked_(locked) {}
  312. explicit ConstIterator(const ConstIterator &);
  313. ConstIterator &operator=(const ConstIterator &lhs);
  314. ConstIterator(ConstIterator &&);
  315. ConstIterator &operator=(ConstIterator &&lhs);
  316. pointer operator->() const { return cur_->data_[cur_->slot_dir_[slot_]].get(); }
  317. reference operator*() const { return *(cur_->data_[cur_->slot_dir_[slot_]].get()); }
  318. const key_type &key() const { return cur_->keys_[cur_->slot_dir_[slot_]]; }
  319. value_type &value() const { return *(cur_->data_[cur_->slot_dir_[slot_]].get()); }
  320. // Prefix++
  321. ConstIterator &operator++();
  322. // Postfix++
  323. ConstIterator operator++(int);
  324. // Prefix--
  325. ConstIterator &operator--();
  326. // Postfix--
  327. ConstIterator operator--(int);
  328. bool operator==(const ConstIterator &x) const { return (x.cur_ == cur_) && (x.slot_ == slot_); }
  329. bool operator!=(const ConstIterator &x) const { return (x.cur_ != cur_) || (x.slot_ != slot_); }
  330. private:
  331. const typename BPlusTree::LeafNode *cur_;
  332. slot_type slot_;
  333. bool locked_;
  334. };
  335. Iterator begin();
  336. Iterator end();
  337. ConstIterator begin() const;
  338. ConstIterator end() const;
  339. ConstIterator cbegin() const;
  340. ConstIterator cend() const;
  341. // Locate the entry with key
  342. std::pair<ConstIterator, bool> Search(const key_type &key) const;
  343. std::pair<Iterator, bool> Search(const key_type &key);
  344. value_type operator[](key_type key);
  345. };
  346. } // namespace dataset
  347. } // namespace mindspore
  348. #endif // MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_INDEX_H_
  349. #include "btree_impl.tpp"
  350. #include "btree_iterator.tpp"