You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

btree.h 13 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446
  1. /**
  2. * Copyright 2019 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef DATASET_UTIL_INDEX_H_
  17. #define DATASET_UTIL_INDEX_H_
  18. #include <algorithm>
  19. #include <atomic>
  20. #include <functional>
  21. #include <utility>
  22. #include <memory>
  23. #include <deque>
  24. #include "./securec.h"
  25. #include "dataset/util/allocator.h"
  26. #include "dataset/util/list.h"
  27. #include "dataset/util/lock.h"
  28. #include "dataset/util/memory_pool.h"
  29. #include "dataset/util/services.h"
  30. #include "dataset/util/status.h"
  31. namespace mindspore {
  32. namespace dataset {
  33. // Default traits for a B+ tree
  34. struct BPlusTreeTraits {
  35. // This determines the limit of number of keys in a node.
  36. using slot_type = uint16_t;
  37. // Number of slots in each leaf of the tree.
  38. static constexpr slot_type kLeafSlots = 256;
  39. // Number of slots in each inner node of the tree
  40. static constexpr slot_type kInnerSlots = 128;
  41. // If kAppendMode is true, we will split high instead of 50/50 split
  42. static constexpr bool kAppendMode = false;
  43. };
  44. // Implementation of B+ tree
  45. // @tparam K
  46. // @tparam V
  47. // @tparam C
  48. // @tparam T
  49. template <typename K, typename V, typename C = std::less<K>, typename T = BPlusTreeTraits>
  50. class BPlusTree {
  51. public:
  52. enum class IndexRc : char {
  53. kOk = 0,
  54. kDuplicateKey = 1,
  55. kSlotFull = 2,
  56. kKeyNotFound = 3,
  57. kNullPointer = 4,
  58. kOutOfMemory = 5,
  59. kRetry = 6,
  60. kUnexpectedError = 127
  61. };
  62. #define RETURN_IF_BAD_RC(_s) \
  63. do { \
  64. IndexRc __rc = (_s); \
  65. if (__rc != IndexRc::kOk) { \
  66. return __rc; \
  67. } \
  68. } while (false)
  69. Status IndexRc2Status(IndexRc rc) {
  70. if (rc == IndexRc::kOk) {
  71. return Status(StatusCode::kOK);
  72. } else if (rc == IndexRc::kOutOfMemory) {
  73. return Status(StatusCode::kOutOfMemory);
  74. } else if (rc == IndexRc::kDuplicateKey) {
  75. return Status(StatusCode::kDuplicateKey);
  76. } else {
  77. RETURN_STATUS_UNEXPECTED(std::to_string(static_cast<int>(rc)));
  78. }
  79. }
  80. using key_type = K;
  81. using value_type = V;
  82. using key_compare = C;
  83. using slot_type = typename T::slot_type;
  84. using traits = T;
  85. using key_allocator = Allocator<key_type>;
  86. using value_allocator = Allocator<value_type>;
  87. using slot_allocator = Allocator<slot_type>;
  88. explicit BPlusTree(const value_allocator &alloc);
  89. ~BPlusTree() noexcept;
  90. BPlusTree(const BPlusTree &) = delete;
  91. BPlusTree(BPlusTree &&) = delete;
  92. BPlusTree &operator=(const BPlusTree &) = delete;
  93. BPlusTree &operator=(BPlusTree &&) = delete;
  94. key_compare key_comp() const { return key_less_; }
  95. size_t size() const { return stats_.size_; }
  96. bool empty() const { return (size() == 0); }
  97. // @param key
  98. // @param value
  99. // @return
  100. Status DoInsert(const key_type &key, const value_type &value);
  101. void PopulateNumKeys();
  102. key_type KeyAtPos(uint64_t inx);
  103. // Statistics
  104. struct tree_stats {
  105. std::atomic<uint64_t> size_;
  106. uint32_t leaves_;
  107. uint32_t inner_nodes_;
  108. uint32_t level_;
  109. bool num_keys_array_valid_;
  110. tree_stats() : size_(0), leaves_(0), inner_nodes_(0), level_(0), num_keys_array_valid_(false) {}
  111. };
  112. private:
  113. // Abstract class of a node (leaf or inner)
  114. class BaseNode {
  115. public:
  116. friend class BPlusTree;
  117. virtual bool is_leafnode() const = 0;
  118. virtual bool is_full() const = 0;
  119. explicit BaseNode(const value_allocator &alloc) : alloc_(alloc) {}
  120. virtual ~BaseNode() = default;
  121. protected:
  122. RWLock rw_lock_;
  123. value_allocator alloc_;
  124. private:
  125. Node<BaseNode> lru_;
  126. };
  127. uint64_t PopulateNumKeys(BaseNode *n);
  128. key_type KeyAtPos(BaseNode *n, uint64_t inx);
  129. // This control block keeps track of all the nodes we traverse on insert.
  130. // To maximize concurrency, internal nodes are latched S. If a node split
  131. // is required, we must releases all the latches and redo it again and change
  132. // the latch mode from S to X.
  133. struct LockPathCB {
  134. enum class LockMode : char { kShared = 0, kExclusive = 1, kNone = 2 };
  135. struct path {
  136. BaseNode *node_;
  137. bool locked_;
  138. path() : node_(nullptr), locked_(false) {}
  139. path(BaseNode *p, LockMode lockmode) : node_(p), locked_(false) {
  140. if (lockmode == LockMode::kExclusive) {
  141. p->rw_lock_.LockExclusive();
  142. locked_ = true;
  143. } else if (lockmode == LockMode::kShared) {
  144. p->rw_lock_.LockShared();
  145. locked_ = true;
  146. }
  147. }
  148. };
  149. LockPathCB(BPlusTree *tree, bool retryWithXlock) : self_(tree), latch_shared_(true) {
  150. if (retryWithXlock) {
  151. latch_shared_ = false;
  152. }
  153. if (latch_shared_) {
  154. tree->rw_lock_.LockShared();
  155. } else {
  156. tree->rw_lock_.LockExclusive();
  157. }
  158. }
  159. ~LockPathCB() noexcept {
  160. // Make sure all locks are released.
  161. while (!paths_.empty()) {
  162. path p = paths_.back();
  163. paths_.pop_back();
  164. if (p.locked_) {
  165. p.node_->rw_lock_.Unlock();
  166. }
  167. }
  168. self_->rw_lock_.Unlock();
  169. self_ = nullptr;
  170. }
  171. void LockNode(BaseNode *p, LockMode locktype) { paths_.emplace_back(p, locktype); }
  172. void UnlockMyParents(BaseNode *me) {
  173. path p = paths_.front();
  174. while (p.node_ != me) {
  175. if (p.locked_) {
  176. p.node_->rw_lock_.Unlock();
  177. }
  178. paths_.pop_front();
  179. p = paths_.front();
  180. }
  181. }
  182. BPlusTree *self_;
  183. std::deque<path> paths_;
  184. bool latch_shared_;
  185. };
  186. // Definition of inner node which fans to either inner node or leaf node.
  187. class InnerNode : public BaseNode {
  188. public:
  189. friend class BPlusTree;
  190. using alloc_type = typename value_allocator::template rebind<InnerNode>::other;
  191. bool is_leafnode() const override { return false; }
  192. bool is_full() const override { return (slotuse_ == traits::kInnerSlots); }
  193. IndexRc Sort();
  194. // 50/50 split
  195. IndexRc Split(InnerNode *to, key_type *split_key);
  196. IndexRc InsertIntoSlot(slot_type slot, const key_type &key, BaseNode *ptr);
  197. explicit InnerNode(const value_allocator &alloc) : BaseNode::BaseNode(alloc), slotuse_(0) {}
  198. ~InnerNode() = default;
  199. slot_type slot_dir_[traits::kInnerSlots];
  200. key_type keys_[traits::kInnerSlots];
  201. BaseNode *data_[traits::kInnerSlots + 1] = {nullptr};
  202. uint64_t num_keys_[traits::kInnerSlots + 1] = {0};
  203. slot_type slotuse_;
  204. };
  205. // Definition of a leaf node which contains the key/value pair
  206. class LeafNode : public BaseNode {
  207. public:
  208. friend class BPlusTree;
  209. using alloc_type = typename value_allocator::template rebind<LeafNode>::other;
  210. Node<LeafNode> link_;
  211. bool is_leafnode() const override { return true; }
  212. bool is_full() const override { return (slotuse_ == traits::kLeafSlots); }
  213. IndexRc Sort();
  214. // 50/50 split
  215. IndexRc Split(LeafNode *to);
  216. IndexRc InsertIntoSlot(LockPathCB *insCB, slot_type slot, const key_type &key, std::shared_ptr<value_type> value);
  217. explicit LeafNode(const value_allocator &alloc) : BaseNode::BaseNode(alloc), slotuse_(0) {}
  218. ~LeafNode() = default;
  219. slot_type slot_dir_[traits::kLeafSlots];
  220. key_type keys_[traits::kLeafSlots];
  221. std::shared_ptr<value_type> data_[traits::kLeafSlots];
  222. slot_type slotuse_;
  223. };
  224. RWLock rw_lock_;
  225. value_allocator alloc_;
  226. // All the leaf nodes. Used by the iterator to traverse all the key/values.
  227. List<LeafNode> leaf_nodes_;
  228. // All the nodes (inner + leaf). Used by the destructor to free the memory of all the nodes.
  229. List<BaseNode> all_;
  230. // Pointer to the root of the tree.
  231. BaseNode *root_;
  232. // Key comparison object
  233. key_compare key_less_;
  234. // Stat
  235. tree_stats stats_;
  236. bool LessThan(const key_type &a, const key_type &b) const { return key_less_(a, b); }
  237. bool EqualOrLessThan(const key_type &a, const key_type &b) const { return !key_less_(b, a); }
  238. bool Equal(const key_type &a, const key_type &b) const { return !key_less_(a, b) && !key_less_(b, a); }
  239. IndexRc AllocateInner(InnerNode **p);
  240. IndexRc AllocateLeaf(LeafNode **p);
  241. template <typename node_type>
  242. slot_type FindSlot(const node_type *node, const key_type &key, bool *duplicate = nullptr) const {
  243. slot_type lo = 0;
  244. while (lo < node->slotuse_ && key_comp()(node->keys_[node->slot_dir_[lo]], key)) {
  245. ++lo;
  246. }
  247. bool keymatch = (lo < node->slotuse_ && Equal(key, node->keys_[node->slot_dir_[lo]]));
  248. if (keymatch && !node->is_leafnode()) {
  249. // For an inner node and we match a key during search, we should look into the next slot.
  250. ++lo;
  251. }
  252. if (duplicate != nullptr) {
  253. *duplicate = keymatch;
  254. }
  255. return lo;
  256. }
  257. IndexRc LeafInsertKeyValue(LockPathCB *ins_cb, LeafNode *node, const key_type &key, std::shared_ptr<value_type> value,
  258. key_type *split_key, LeafNode **split_node);
  259. IndexRc InnerInsertKeyChild(InnerNode *node, const key_type &key, BaseNode *ptr, key_type *split_key,
  260. InnerNode **split_node);
  261. inline BaseNode *FindBranch(InnerNode *inner, slot_type slot) const {
  262. BaseNode *child = nullptr;
  263. if (slot == 0) {
  264. child = inner->data_[0];
  265. } else {
  266. child = inner->data_[inner->slot_dir_[slot - 1] + 1];
  267. }
  268. return child;
  269. }
  270. IndexRc InsertKeyValue(LockPathCB *ins_cb, BaseNode *n, const key_type &key, std::shared_ptr<value_type> value,
  271. key_type *split_key, BaseNode **split_node);
  272. IndexRc Locate(BaseNode *top, const key_type &key, LeafNode **ln, slot_type *s) const;
  273. public:
  274. class Iterator : public std::iterator<std::bidirectional_iterator_tag, value_type> {
  275. public:
  276. using reference = BPlusTree::value_type &;
  277. using pointer = BPlusTree::value_type *;
  278. explicit Iterator(BPlusTree *btree) : cur_(btree->leaf_nodes_.head), slot_(0) {}
  279. Iterator(LeafNode *leaf, slot_type slot) : cur_(leaf), slot_(slot) {}
  280. ~Iterator() = default;
  281. pointer operator->() const { return cur_->data_[cur_->slot_dir_[slot_]].get(); }
  282. reference operator*() const { return *(cur_->data_[cur_->slot_dir_[slot_]].get()); }
  283. const key_type &key() { return cur_->keys_[cur_->slot_dir_[slot_]]; }
  284. const value_type &value() { return *(cur_->data_[cur_->slot_dir_[slot_]].get()); }
  285. // Prefix++
  286. Iterator &operator++();
  287. // Postfix++
  288. Iterator operator++(int);
  289. // Prefix--
  290. Iterator &operator--();
  291. // Postfix--
  292. Iterator operator--(int);
  293. bool operator==(const Iterator &x) const { return (x.cur_ == cur_) && (x.slot_ == slot_); }
  294. bool operator!=(const Iterator &x) const { return (x.cur_ != cur_) || (x.slot_ != slot_); }
  295. private:
  296. typename BPlusTree::LeafNode *cur_;
  297. slot_type slot_;
  298. };
  299. class ConstIterator : public std::iterator<std::bidirectional_iterator_tag, value_type> {
  300. public:
  301. using reference = BPlusTree::value_type &;
  302. using pointer = BPlusTree::value_type *;
  303. explicit ConstIterator(const BPlusTree *btree) : cur_(btree->leaf_nodes_.head), slot_(0) {}
  304. ~ConstIterator() = default;
  305. ConstIterator(const LeafNode *leaf, slot_type slot) : cur_(leaf), slot_(slot) {}
  306. pointer operator->() const { return cur_->data_[cur_->slot_dir_[slot_]].get(); }
  307. reference operator*() const { return *(cur_->data_[cur_->slot_dir_[slot_]].get()); }
  308. const key_type &key() const { return cur_->keys_[cur_->slot_dir_[slot_]]; }
  309. const value_type &value() const { return *(cur_->data_[cur_->slot_dir_[slot_]].get()); }
  310. // Prefix++
  311. ConstIterator &operator++();
  312. // Postfix++
  313. ConstIterator operator++(int);
  314. // Prefix--
  315. ConstIterator &operator--();
  316. // Postfix--
  317. ConstIterator operator--(int);
  318. bool operator==(const ConstIterator &x) const { return (x.cur_ == cur_) && (x.slot_ == slot_); }
  319. bool operator!=(const ConstIterator &x) const { return (x.cur_ != cur_) || (x.slot_ != slot_); }
  320. private:
  321. const typename BPlusTree::LeafNode *cur_;
  322. slot_type slot_;
  323. };
  324. Iterator begin();
  325. Iterator end();
  326. ConstIterator begin() const;
  327. ConstIterator end() const;
  328. ConstIterator cbegin() const;
  329. ConstIterator cend() const;
  330. // Locate the entry with key
  331. ConstIterator Search(const key_type &key) const;
  332. value_type operator[](key_type key);
  333. };
  334. } // namespace dataset
  335. } // namespace mindspore
  336. #endif // DATASET_UTIL_INDEX_H_
  337. #include "btree_impl.tpp"
  338. #include "btree_iterator.tpp"