You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

btree.h 15 kB

5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487
  1. /**
  2. * Copyright 2019 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_INDEX_H_
  17. #define MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_INDEX_H_
  18. #include <algorithm>
  19. #include <atomic>
  20. #include <functional>
  21. #include <utility>
  22. #include <memory>
  23. #include <deque>
  24. #include "./securec.h"
  25. #include "minddata/dataset/util/allocator.h"
  26. #include "minddata/dataset/util/list.h"
  27. #include "minddata/dataset/util/lock.h"
  28. #include "minddata/dataset/util/memory_pool.h"
  29. #include "minddata/dataset/util/services.h"
  30. #include "minddata/dataset/util/status.h"
  31. namespace mindspore {
  32. namespace dataset {
  33. // Default traits for a B+ tree
  34. struct BPlusTreeTraits {
  35. // This determines the limit of number of keys in a node.
  36. using slot_type = uint16_t;
  37. // Number of slots in each leaf of the tree.
  38. static constexpr slot_type kLeafSlots = 256;
  39. // Number of slots in each inner node of the tree
  40. static constexpr slot_type kInnerSlots = 128;
  41. };
  42. /// Implementation of B+ tree
  43. /// @tparam K -- the type of key
  44. /// @tparam V -- the type of value
  45. /// @tparam A -- allocator
  46. /// @tparam C -- comparison class
  47. /// @tparam T -- trait
  48. template <typename K, typename V, typename A = std::allocator<V>, typename C = std::less<K>,
  49. typename T = BPlusTreeTraits>
  50. class BPlusTree {
  51. public:
  52. enum class IndexRc : char {
  53. kOk = 0,
  54. kDuplicateKey = 1,
  55. kSlotFull = 2,
  56. kKeyNotFound = 3,
  57. kNullPointer = 4,
  58. kOutOfMemory = 5,
  59. kRetry = 6,
  60. kUnexpectedError = 127
  61. };
  62. #define RETURN_IF_BAD_RC(_s) \
  63. do { \
  64. IndexRc __rc = (_s); \
  65. if (__rc != IndexRc::kOk) { \
  66. return __rc; \
  67. } \
  68. } while (false)
  69. Status IndexRc2Status(IndexRc rc) {
  70. if (rc == IndexRc::kOk) {
  71. return Status(StatusCode::kOK);
  72. } else if (rc == IndexRc::kOutOfMemory) {
  73. return Status(StatusCode::kOutOfMemory);
  74. } else if (rc == IndexRc::kDuplicateKey) {
  75. return Status(StatusCode::kDuplicateKey);
  76. } else {
  77. RETURN_STATUS_UNEXPECTED(std::to_string(static_cast<int>(rc)));
  78. }
  79. }
  80. using key_type = K;
  81. using value_type = V;
  82. using key_compare = C;
  83. using slot_type = typename T::slot_type;
  84. using traits = T;
  85. using value_allocator = A;
  86. using key_allocator = typename value_allocator::template rebind<key_type>::other;
  87. using slot_allocator = typename value_allocator::template rebind<slot_type>::other;
  88. BPlusTree();
  89. explicit BPlusTree(const Allocator<V> &alloc);
  90. ~BPlusTree() noexcept;
  91. BPlusTree(const BPlusTree &) = delete;
  92. BPlusTree(BPlusTree &&) = delete;
  93. BPlusTree &operator=(const BPlusTree &) = delete;
  94. BPlusTree &operator=(BPlusTree &&) = delete;
  95. key_compare key_comp() const { return key_less_; }
  96. size_t size() const { return stats_.size_; }
  97. bool empty() const { return (size() == 0); }
  98. /// @param key
  99. /// @param value
  100. /// @return
  101. Status DoInsert(const key_type &key, const value_type &value);
  102. Status DoInsert(const key_type &key, std::unique_ptr<value_type> &&value);
  103. // Update a new value for a given key.
  104. std::unique_ptr<value_type> DoUpdate(const key_type &key, const value_type &new_value);
  105. std::unique_ptr<value_type> DoUpdate(const key_type &key, std::unique_ptr<value_type> &&new_value);
  106. // Statistics
  107. struct tree_stats {
  108. std::atomic<uint64_t> size_;
  109. uint32_t leaves_;
  110. uint32_t inner_nodes_;
  111. uint32_t level_;
  112. tree_stats() : size_(0), leaves_(0), inner_nodes_(0), level_(0) {}
  113. };
  114. /// \brief Statistics functions
  115. /// \return Return the height of the tree
  116. auto GetHeight() const { return empty() ? 0 : stats_.level_ + 1; }
  117. /// \return Order of the B+ tree
  118. auto GetOrder() const { return traits::kLeafSlots; }
  119. /// \return Number of leaves nodes
  120. auto GetNumLeaves() const { return stats_.leaves_; }
  121. /// \return Number of inner nodes
  122. auto GetNumInnerNodes() const { return stats_.inner_nodes_; }
  123. /// \brief Toggle locking
  124. /// \note Once locking is off. It is user's responsibility to ensure concurrency
  125. void SetLocking(bool on_off) {
  126. UniqueLock lck(&rw_lock_);
  127. acquire_lock_ = on_off;
  128. }
  129. private:
  130. // Abstract class of a node (leaf or inner)
  131. class BaseNode {
  132. public:
  133. friend class BPlusTree;
  134. virtual bool is_leafnode() const = 0;
  135. virtual bool is_full() const = 0;
  136. explicit BaseNode(const value_allocator &alloc) : alloc_(alloc) {}
  137. virtual ~BaseNode() = default;
  138. protected:
  139. mutable RWLock rw_lock_;
  140. value_allocator alloc_;
  141. private:
  142. Node<BaseNode> lru_;
  143. };
  144. // This control block keeps track of all the nodes we traverse on insert.
  145. // To maximize concurrency, internal nodes are latched S. If a node split
  146. // is required, we must releases all the latches and redo it again and change
  147. // the latch mode from S to X.
  148. struct LockPathCB {
  149. enum class LockMode : char { kShared = 0, kExclusive = 1, kNone = 2 };
  150. struct path {
  151. BaseNode *node_;
  152. bool locked_;
  153. path() : node_(nullptr), locked_(false) {}
  154. path(BaseNode *p, LockMode lockmode) : node_(p), locked_(false) {
  155. if (lockmode == LockMode::kExclusive) {
  156. p->rw_lock_.LockExclusive();
  157. locked_ = true;
  158. } else if (lockmode == LockMode::kShared) {
  159. p->rw_lock_.LockShared();
  160. locked_ = true;
  161. }
  162. }
  163. };
  164. LockPathCB(BPlusTree *tree, bool retryWithXlock) : self_(tree), latch_shared_(true) {
  165. if (retryWithXlock) {
  166. latch_shared_ = false;
  167. }
  168. if (latch_shared_) {
  169. tree->rw_lock_.LockShared();
  170. } else {
  171. tree->rw_lock_.LockExclusive();
  172. }
  173. }
  174. ~LockPathCB() noexcept {
  175. // Make sure all locks are released.
  176. while (!paths_.empty()) {
  177. path p = paths_.back();
  178. paths_.pop_back();
  179. if (p.locked_) {
  180. p.node_->rw_lock_.Unlock();
  181. }
  182. }
  183. self_->rw_lock_.Unlock();
  184. self_ = nullptr;
  185. }
  186. void LockNode(BaseNode *p, LockMode locktype) { paths_.emplace_back(p, locktype); }
  187. void UnlockMyParents(BaseNode *me) {
  188. path p = paths_.front();
  189. while (p.node_ != me) {
  190. if (p.locked_) {
  191. p.node_->rw_lock_.Unlock();
  192. }
  193. paths_.pop_front();
  194. p = paths_.front();
  195. }
  196. }
  197. BPlusTree *self_;
  198. std::deque<path> paths_;
  199. bool latch_shared_;
  200. };
  201. // Definition of inner node which fans to either inner node or leaf node.
  202. class InnerNode : public BaseNode {
  203. public:
  204. friend class BPlusTree;
  205. using alloc_type = typename value_allocator::template rebind<InnerNode>::other;
  206. bool is_leafnode() const override { return false; }
  207. bool is_full() const override { return (slotuse_ == traits::kInnerSlots); }
  208. IndexRc Sort();
  209. // 50/50 split
  210. IndexRc Split(InnerNode *to, key_type *split_key);
  211. IndexRc InsertIntoSlot(slot_type slot, const key_type &key, BaseNode *ptr);
  212. explicit InnerNode(const value_allocator &alloc) : BaseNode::BaseNode(alloc), slotuse_(0) {}
  213. ~InnerNode() = default;
  214. slot_type slot_dir_[traits::kInnerSlots] = {0};
  215. key_type keys_[traits::kInnerSlots] = {0};
  216. BaseNode *data_[traits::kInnerSlots + 1] = {nullptr};
  217. slot_type slotuse_;
  218. };
  219. // Definition of a leaf node which contains the key/value pair
  220. class LeafNode : public BaseNode {
  221. public:
  222. friend class BPlusTree;
  223. using alloc_type = typename value_allocator::template rebind<LeafNode>::other;
  224. Node<LeafNode> link_;
  225. bool is_leafnode() const override { return true; }
  226. bool is_full() const override { return (slotuse_ == traits::kLeafSlots); }
  227. IndexRc Sort();
  228. // 50/50 split
  229. IndexRc Split(LeafNode *to);
  230. IndexRc InsertIntoSlot(LockPathCB *insCB, slot_type slot, const key_type &key, std::unique_ptr<value_type> &&value);
  231. explicit LeafNode(const value_allocator &alloc) : BaseNode::BaseNode(alloc), slotuse_(0) {}
  232. ~LeafNode() = default;
  233. slot_type slot_dir_[traits::kLeafSlots] = {0};
  234. key_type keys_[traits::kLeafSlots] = {0};
  235. std::unique_ptr<value_type> data_[traits::kLeafSlots];
  236. slot_type slotuse_;
  237. };
  238. mutable RWLock rw_lock_;
  239. value_allocator alloc_;
  240. // All the leaf nodes. Used by the iterator to traverse all the key/values.
  241. List<LeafNode> leaf_nodes_;
  242. // All the nodes (inner + leaf). Used by the destructor to free the memory of all the nodes.
  243. List<BaseNode> all_;
  244. // Pointer to the root of the tree.
  245. BaseNode *root_;
  246. // Key comparison object
  247. key_compare key_less_;
  248. // Stat
  249. tree_stats stats_;
  250. // lock mode
  251. bool acquire_lock_;
  252. void Init() {
  253. typename LeafNode::alloc_type alloc(alloc_);
  254. auto *p = alloc.allocate(1);
  255. root_ = new (p) LeafNode(alloc_);
  256. all_.Prepend(p);
  257. leaf_nodes_.Append(p);
  258. stats_.leaves_++;
  259. }
  260. bool LessThan(const key_type &a, const key_type &b) const { return key_less_(a, b); }
  261. bool EqualOrLessThan(const key_type &a, const key_type &b) const { return !key_less_(b, a); }
  262. bool Equal(const key_type &a, const key_type &b) const { return !key_less_(a, b) && !key_less_(b, a); }
  263. IndexRc AllocateInner(InnerNode **p);
  264. IndexRc AllocateLeaf(LeafNode **p);
  265. template <typename node_type>
  266. slot_type FindSlot(const node_type *node, const key_type &key, bool *duplicate = nullptr) const {
  267. slot_type lo = 0;
  268. while (lo < node->slotuse_ && key_comp()(node->keys_[node->slot_dir_[lo]], key)) {
  269. ++lo;
  270. }
  271. bool keymatch = (lo < node->slotuse_ && Equal(key, node->keys_[node->slot_dir_[lo]]));
  272. if (keymatch && !node->is_leafnode()) {
  273. // For an inner node and we match a key during search, we should look into the next slot.
  274. ++lo;
  275. }
  276. if (duplicate != nullptr) {
  277. *duplicate = keymatch;
  278. }
  279. return lo;
  280. }
  281. IndexRc LeafInsertKeyValue(LockPathCB *ins_cb, LeafNode *node, const key_type &key,
  282. std::unique_ptr<value_type> &&value, key_type *split_key, LeafNode **split_node);
  283. IndexRc InnerInsertKeyChild(InnerNode *node, const key_type &key, BaseNode *ptr, key_type *split_key,
  284. InnerNode **split_node);
  285. inline BaseNode *FindBranch(InnerNode *inner, slot_type slot) const {
  286. BaseNode *child = nullptr;
  287. if (slot == 0) {
  288. child = inner->data_[0];
  289. } else {
  290. child = inner->data_[inner->slot_dir_[slot - 1] + 1];
  291. }
  292. return child;
  293. }
  294. IndexRc InsertKeyValue(LockPathCB *ins_cb, BaseNode *n, const key_type &key, std::unique_ptr<value_type> &&value,
  295. key_type *split_key, BaseNode **split_node);
  296. IndexRc Locate(RWLock *parent_lock, bool forUpdate, BaseNode *top, const key_type &key, LeafNode **ln,
  297. slot_type *s) const;
  298. public:
  299. class Iterator : public std::iterator<std::bidirectional_iterator_tag, value_type> {
  300. public:
  301. using reference = BPlusTree::value_type &;
  302. using pointer = BPlusTree::value_type *;
  303. explicit Iterator(BPlusTree *btree) : cur_(btree->leaf_nodes_.head), slot_(0), locked_(false) {}
  304. Iterator(LeafNode *leaf, slot_type slot, bool locked = false) : cur_(leaf), slot_(slot), locked_(locked) {}
  305. ~Iterator();
  306. Iterator(const Iterator &);
  307. Iterator &operator=(const Iterator &lhs);
  308. Iterator(Iterator &&) noexcept;
  309. Iterator &operator=(Iterator &&lhs);
  310. pointer operator->() const { return cur_->data_[cur_->slot_dir_[slot_]].get(); }
  311. reference operator*() const { return *(cur_->data_[cur_->slot_dir_[slot_]].get()); }
  312. const key_type &key() const { return cur_->keys_[cur_->slot_dir_[slot_]]; }
  313. value_type &value() const { return *(cur_->data_[cur_->slot_dir_[slot_]].get()); }
  314. // Prefix++
  315. Iterator &operator++();
  316. // Postfix++
  317. Iterator operator++(int);
  318. // Prefix--
  319. Iterator &operator--();
  320. // Postfix--
  321. Iterator operator--(int);
  322. bool operator==(const Iterator &x) const { return (x.cur_ == cur_) && (x.slot_ == slot_); }
  323. bool operator!=(const Iterator &x) const { return (x.cur_ != cur_) || (x.slot_ != slot_); }
  324. private:
  325. typename BPlusTree::LeafNode *cur_;
  326. slot_type slot_;
  327. bool locked_;
  328. };
  329. class ConstIterator : public std::iterator<std::bidirectional_iterator_tag, value_type> {
  330. public:
  331. using reference = BPlusTree::value_type &;
  332. using pointer = BPlusTree::value_type *;
  333. explicit ConstIterator(const BPlusTree *btree) : cur_(btree->leaf_nodes_.head), slot_(0), locked_(false) {}
  334. ~ConstIterator();
  335. ConstIterator(const LeafNode *leaf, slot_type slot, bool locked = false)
  336. : cur_(leaf), slot_(slot), locked_(locked) {}
  337. ConstIterator(const ConstIterator &);
  338. ConstIterator &operator=(const ConstIterator &lhs);
  339. ConstIterator(ConstIterator &&) noexcept;
  340. ConstIterator &operator=(ConstIterator &&lhs);
  341. pointer operator->() const { return cur_->data_[cur_->slot_dir_[slot_]].get(); }
  342. reference operator*() const { return *(cur_->data_[cur_->slot_dir_[slot_]].get()); }
  343. const key_type &key() const { return cur_->keys_[cur_->slot_dir_[slot_]]; }
  344. value_type &value() const { return *(cur_->data_[cur_->slot_dir_[slot_]].get()); }
  345. // Prefix++
  346. ConstIterator &operator++();
  347. // Postfix++
  348. ConstIterator operator++(int);
  349. // Prefix--
  350. ConstIterator &operator--();
  351. // Postfix--
  352. ConstIterator operator--(int);
  353. bool operator==(const ConstIterator &x) const { return (x.cur_ == cur_) && (x.slot_ == slot_); }
  354. bool operator!=(const ConstIterator &x) const { return (x.cur_ != cur_) || (x.slot_ != slot_); }
  355. private:
  356. const typename BPlusTree::LeafNode *cur_;
  357. slot_type slot_;
  358. bool locked_;
  359. };
  360. Iterator begin();
  361. Iterator end();
  362. ConstIterator begin() const;
  363. ConstIterator end() const;
  364. ConstIterator cbegin() const;
  365. ConstIterator cend() const;
  366. // Locate the entry with key
  367. std::pair<ConstIterator, bool> Search(const key_type &key) const;
  368. std::pair<Iterator, bool> Search(const key_type &key);
  369. value_type operator[](key_type key);
  370. };
  371. } // namespace dataset
  372. } // namespace mindspore
  373. #endif // MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_INDEX_H_
  374. #include "btree_impl.tpp"
  375. #include "btree_iterator.tpp"