You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

btree.h 16 kB

5 years ago
5 years ago
4 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529
  1. /**
  2. * Copyright 2019 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_INDEX_H_
  17. #define MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_INDEX_H_
  18. #include <algorithm>
  19. #include <atomic>
  20. #include <functional>
  21. #include <utility>
  22. #include <memory>
  23. #include <deque>
  24. #include "./securec.h"
  25. #include "minddata/dataset/util/allocator.h"
  26. #include "minddata/dataset/util/list.h"
  27. #include "minddata/dataset/util/lock.h"
  28. #include "minddata/dataset/util/memory_pool.h"
  29. #include "minddata/dataset/util/services.h"
  30. #include "minddata/dataset/util/status.h"
  31. namespace mindspore {
  32. namespace dataset {
  33. // Default traits for a B+ tree
  34. struct BPlusTreeTraits {
  35. // This determines the limit of number of keys in a node.
  36. using slot_type = uint16_t;
  37. // Number of slots in each leaf of the tree.
  38. static constexpr slot_type kLeafSlots = 256;
  39. // Number of slots in each inner node of the tree
  40. static constexpr slot_type kInnerSlots = 128;
  41. };
  42. /// Implementation of B+ tree
  43. /// @tparam K -- the type of key
  44. /// @tparam V -- the type of value
  45. /// @tparam A -- allocator
  46. /// @tparam C -- comparison class
  47. /// @tparam T -- trait
  48. template <typename K, typename V, typename A = std::allocator<V>, typename C = std::less<K>,
  49. typename T = BPlusTreeTraits>
  50. class BPlusTree {
  51. public:
  52. enum class IndexRc : char {
  53. kOk = 0,
  54. kDuplicateKey = 1,
  55. kSlotFull = 2,
  56. kKeyNotFound = 3,
  57. kNullPointer = 4,
  58. kOutOfMemory = 5,
  59. kRetry = 6,
  60. kUnexpectedError = 127
  61. };
  62. #define RETURN_IF_BAD_RC(_s) \
  63. do { \
  64. IndexRc __rc = (_s); \
  65. if (__rc != IndexRc::kOk) { \
  66. return __rc; \
  67. } \
  68. } while (false)
  69. Status IndexRc2Status(IndexRc rc) {
  70. if (rc == IndexRc::kOk) {
  71. return Status(StatusCode::kSuccess);
  72. } else if (rc == IndexRc::kOutOfMemory) {
  73. return Status(StatusCode::kMDOutOfMemory);
  74. } else if (rc == IndexRc::kDuplicateKey) {
  75. return Status(StatusCode::kMDDuplicateKey);
  76. } else {
  77. RETURN_STATUS_UNEXPECTED(std::to_string(static_cast<int>(rc)));
  78. }
  79. }
  80. using key_type = K;
  81. using value_type = V;
  82. using key_compare = C;
  83. using slot_type = typename T::slot_type;
  84. using traits = T;
  85. using value_allocator = A;
  86. using key_allocator = typename value_allocator::template rebind<key_type>::other;
  87. using slot_allocator = typename value_allocator::template rebind<slot_type>::other;
  88. BPlusTree();
  89. explicit BPlusTree(const Allocator<V> &alloc);
  90. ~BPlusTree() noexcept;
  91. BPlusTree(const BPlusTree &) = delete;
  92. BPlusTree(BPlusTree &&) = delete;
  93. BPlusTree &operator=(const BPlusTree &) = delete;
  94. BPlusTree &operator=(BPlusTree &&) = delete;
  95. key_compare key_comp() const { return key_less_; }
  96. size_t size() const { return stats_.size_; }
  97. bool empty() const { return (size() == 0); }
  98. /// @param key
  99. /// @param value
  100. /// @return
  101. Status DoInsert(const key_type &key, const value_type &value);
  102. Status DoInsert(const key_type &key, std::unique_ptr<value_type> &&value);
  103. // Update a new value for a given key.
  104. std::unique_ptr<value_type> DoUpdate(const key_type &key, const value_type &new_value);
  105. std::unique_ptr<value_type> DoUpdate(const key_type &key, std::unique_ptr<value_type> &&new_value);
  106. // Statistics
  107. struct tree_stats {
  108. std::atomic<uint64_t> size_;
  109. uint32_t leaves_;
  110. uint32_t inner_nodes_;
  111. uint32_t level_;
  112. tree_stats() : size_(0), leaves_(0), inner_nodes_(0), level_(0) {}
  113. };
  114. /// \brief Statistics functions
  115. /// \return Return the height of the tree
  116. auto GetHeight() const { return empty() ? 0 : stats_.level_ + 1; }
  117. /// \return Order of the B+ tree
  118. auto GetOrder() const { return traits::kLeafSlots; }
  119. /// \return Number of leaves nodes
  120. auto GetNumLeaves() const { return stats_.leaves_; }
  121. /// \return Number of inner nodes
  122. auto GetNumInnerNodes() const { return stats_.inner_nodes_; }
  123. /// \brief Toggle locking
  124. /// \note Once locking is off. It is user's responsibility to ensure concurrency
  125. void SetLocking(bool on_off) {
  126. UniqueLock lck(&rw_lock_);
  127. acquire_lock_ = on_off;
  128. }
  129. void LockShared() { rw_lock_.LockShared(); }
  130. void LockExclusive() { rw_lock_.LockExclusive(); }
  131. void Unlock() { rw_lock_.Unlock(); }
  132. private:
  133. // Abstract class of a node (leaf or inner)
  134. class BaseNode {
  135. public:
  136. friend class BPlusTree;
  137. virtual bool is_leafnode() const = 0;
  138. virtual bool is_full() const = 0;
  139. explicit BaseNode(const value_allocator &alloc) : alloc_(alloc) {}
  140. virtual ~BaseNode() = default;
  141. protected:
  142. mutable RWLock rw_lock_;
  143. value_allocator alloc_;
  144. private:
  145. Node<BaseNode> lru_;
  146. };
  147. // This control block keeps track of all the nodes we traverse on insert.
  148. // To maximize concurrency, internal nodes are latched S. If a node split
  149. // is required, we must releases all the latches and redo it again and change
  150. // the latch mode from S to X.
  151. struct LockPathCB {
  152. enum class LockMode : char { kShared = 0, kExclusive = 1, kNone = 2 };
  153. struct path {
  154. BaseNode *node_;
  155. bool locked_;
  156. path() : node_(nullptr), locked_(false) {}
  157. path(BaseNode *p, LockMode lockmode) : node_(p), locked_(false) {
  158. if (lockmode == LockMode::kExclusive) {
  159. p->rw_lock_.LockExclusive();
  160. locked_ = true;
  161. } else if (lockmode == LockMode::kShared) {
  162. p->rw_lock_.LockShared();
  163. locked_ = true;
  164. }
  165. }
  166. };
  167. LockPathCB(BPlusTree *tree, bool retryWithXlock) : self_(tree), latch_shared_(true) {
  168. if (retryWithXlock) {
  169. latch_shared_ = false;
  170. }
  171. if (latch_shared_) {
  172. tree->rw_lock_.LockShared();
  173. } else {
  174. tree->rw_lock_.LockExclusive();
  175. }
  176. }
  177. ~LockPathCB() noexcept {
  178. // Make sure all locks are released.
  179. while (!paths_.empty()) {
  180. path p = paths_.back();
  181. paths_.pop_back();
  182. if (p.locked_) {
  183. p.node_->rw_lock_.Unlock();
  184. }
  185. }
  186. self_->rw_lock_.Unlock();
  187. self_ = nullptr;
  188. }
  189. void LockNode(BaseNode *p, LockMode locktype) { paths_.emplace_back(p, locktype); }
  190. void UnlockMyParents(BaseNode *me) {
  191. path p = paths_.front();
  192. while (p.node_ != me) {
  193. if (p.locked_) {
  194. p.node_->rw_lock_.Unlock();
  195. }
  196. paths_.pop_front();
  197. p = paths_.front();
  198. }
  199. }
  200. BPlusTree *self_;
  201. std::deque<path> paths_;
  202. bool latch_shared_;
  203. };
  204. // Definition of inner node which fans to either inner node or leaf node.
  205. class InnerNode : public BaseNode {
  206. public:
  207. friend class BPlusTree;
  208. using alloc_type = typename value_allocator::template rebind<InnerNode>::other;
  209. bool is_leafnode() const override { return false; }
  210. bool is_full() const override { return (slotuse_ == traits::kInnerSlots); }
  211. IndexRc Sort();
  212. // 50/50 split
  213. IndexRc Split(InnerNode *to, key_type *split_key);
  214. IndexRc InsertIntoSlot(slot_type slot, const key_type &key, BaseNode *ptr);
  215. explicit InnerNode(const value_allocator &alloc) : BaseNode::BaseNode(alloc), slotuse_(0) {}
  216. ~InnerNode() = default;
  217. slot_type slot_dir_[traits::kInnerSlots] = {0};
  218. key_type keys_[traits::kInnerSlots] = {0};
  219. BaseNode *data_[traits::kInnerSlots + 1] = {nullptr};
  220. slot_type slotuse_;
  221. };
  222. // Definition of a leaf node which contains the key/value pair
  223. class LeafNode : public BaseNode {
  224. public:
  225. friend class BPlusTree;
  226. using alloc_type = typename value_allocator::template rebind<LeafNode>::other;
  227. Node<LeafNode> link_;
  228. bool is_leafnode() const override { return true; }
  229. bool is_full() const override { return (slotuse_ == traits::kLeafSlots); }
  230. IndexRc Sort();
  231. // 50/50 split
  232. IndexRc Split(LeafNode *to);
  233. IndexRc InsertIntoSlot(LockPathCB *insCB, slot_type slot, const key_type &key, std::unique_ptr<value_type> &&value);
  234. explicit LeafNode(const value_allocator &alloc) : BaseNode::BaseNode(alloc), slotuse_(0) {}
  235. ~LeafNode() = default;
  236. slot_type slot_dir_[traits::kLeafSlots] = {0};
  237. key_type keys_[traits::kLeafSlots] = {0};
  238. std::unique_ptr<value_type> data_[traits::kLeafSlots];
  239. slot_type slotuse_;
  240. };
  241. mutable RWLock rw_lock_;
  242. value_allocator alloc_;
  243. // All the leaf nodes. Used by the iterator to traverse all the key/values.
  244. List<LeafNode> leaf_nodes_;
  245. // All the nodes (inner + leaf). Used by the destructor to free the memory of all the nodes.
  246. List<BaseNode> all_;
  247. // Pointer to the root of the tree.
  248. BaseNode *root_;
  249. // Key comparison object
  250. key_compare key_less_;
  251. // Stat
  252. tree_stats stats_;
  253. // lock mode
  254. bool acquire_lock_;
  255. void Init() {
  256. typename LeafNode::alloc_type alloc(alloc_);
  257. LeafNode *p = nullptr;
  258. try {
  259. p = alloc.allocate(1);
  260. } catch (std::bad_alloc &e) {
  261. p = nullptr;
  262. return;
  263. }
  264. root_ = new (p) LeafNode(alloc_);
  265. all_.Prepend(p);
  266. leaf_nodes_.Append(p);
  267. stats_.leaves_++;
  268. }
  269. bool LessThan(const key_type &a, const key_type &b) const { return key_less_(a, b); }
  270. bool EqualOrLessThan(const key_type &a, const key_type &b) const { return !key_less_(b, a); }
  271. bool Equal(const key_type &a, const key_type &b) const { return !key_less_(a, b) && !key_less_(b, a); }
  272. IndexRc AllocateInner(InnerNode **p);
  273. IndexRc AllocateLeaf(LeafNode **p);
  274. template <typename node_type>
  275. slot_type FindSlot(const node_type *node, const key_type &key, bool *duplicate = nullptr) const {
  276. slot_type lo = 0;
  277. while (lo < node->slotuse_ && key_comp()(node->keys_[node->slot_dir_[lo]], key)) {
  278. ++lo;
  279. }
  280. bool keymatch = (lo < node->slotuse_ && Equal(key, node->keys_[node->slot_dir_[lo]]));
  281. if (keymatch && !node->is_leafnode()) {
  282. // For an inner node and we match a key during search, we should look into the next slot.
  283. ++lo;
  284. }
  285. if (duplicate != nullptr) {
  286. *duplicate = keymatch;
  287. }
  288. return lo;
  289. }
  290. IndexRc LeafInsertKeyValue(LockPathCB *ins_cb, LeafNode *node, const key_type &key,
  291. std::unique_ptr<value_type> &&value, key_type *split_key, LeafNode **split_node);
  292. IndexRc InnerInsertKeyChild(InnerNode *node, const key_type &key, BaseNode *ptr, key_type *split_key,
  293. InnerNode **split_node);
  294. inline BaseNode *FindBranch(InnerNode *inner, slot_type slot) const {
  295. BaseNode *child = nullptr;
  296. if (slot == 0) {
  297. child = inner->data_[0];
  298. } else {
  299. child = inner->data_[inner->slot_dir_[slot - 1] + 1];
  300. }
  301. return child;
  302. }
  303. IndexRc InsertKeyValue(LockPathCB *ins_cb, BaseNode *n, const key_type &key, std::unique_ptr<value_type> &&value,
  304. key_type *split_key, BaseNode **split_node);
  305. IndexRc Locate(RWLock *parent_lock, bool forUpdate, BaseNode *top, const key_type &key, LeafNode **ln,
  306. slot_type *s) const;
  307. public:
  308. class Iterator : public std::iterator<std::bidirectional_iterator_tag, value_type> {
  309. public:
  310. using reference = BPlusTree::value_type &;
  311. using pointer = BPlusTree::value_type *;
  312. explicit Iterator(BPlusTree *btree) : cur_(btree->leaf_nodes_.head), slot_(0), locked_(false) {}
  313. Iterator(LeafNode *leaf, slot_type slot, bool locked = false) : cur_(leaf), slot_(slot), locked_(locked) {}
  314. ~Iterator();
  315. explicit Iterator(const Iterator &);
  316. Iterator &operator=(const Iterator &lhs);
  317. explicit Iterator(Iterator &&) noexcept;
  318. Iterator &operator=(Iterator &&lhs);
  319. pointer operator->() const { return cur_->data_[cur_->slot_dir_[slot_]].get(); }
  320. reference operator*() const { return *(cur_->data_[cur_->slot_dir_[slot_]].get()); }
  321. const key_type &key() const { return cur_->keys_[cur_->slot_dir_[slot_]]; }
  322. value_type &value() const { return *(cur_->data_[cur_->slot_dir_[slot_]].get()); }
  323. // Prefix++
  324. Iterator &operator++();
  325. // Postfix++
  326. Iterator operator++(int);
  327. // Prefix--
  328. Iterator &operator--();
  329. // Postfix--
  330. Iterator operator--(int);
  331. bool operator==(const Iterator &x) const { return (x.cur_ == cur_) && (x.slot_ == slot_); }
  332. bool operator!=(const Iterator &x) const { return (x.cur_ != cur_) || (x.slot_ != slot_); }
  333. void LockShared() {
  334. cur_->rw_lock_.LockShared();
  335. locked_ = true;
  336. }
  337. void LockExclusive() {
  338. cur_->rw_lock_.LockExclusive();
  339. locked_ = true;
  340. }
  341. void Unlock() {
  342. cur_->rw_lock_.Unlock();
  343. locked_ = false;
  344. }
  345. private:
  346. typename BPlusTree::LeafNode *cur_;
  347. slot_type slot_;
  348. bool locked_;
  349. };
  350. class ConstIterator : public std::iterator<std::bidirectional_iterator_tag, value_type> {
  351. public:
  352. using reference = BPlusTree::value_type &;
  353. using pointer = BPlusTree::value_type *;
  354. explicit ConstIterator(const BPlusTree *btree) : cur_(btree->leaf_nodes_.head), slot_(0), locked_(false) {}
  355. ~ConstIterator();
  356. ConstIterator(const LeafNode *leaf, slot_type slot, bool locked = false)
  357. : cur_(leaf), slot_(slot), locked_(locked) {}
  358. explicit ConstIterator(const ConstIterator &);
  359. ConstIterator &operator=(const ConstIterator &lhs);
  360. explicit ConstIterator(ConstIterator &&) noexcept;
  361. ConstIterator &operator=(ConstIterator &&lhs);
  362. pointer operator->() const { return cur_->data_[cur_->slot_dir_[slot_]].get(); }
  363. reference operator*() const { return *(cur_->data_[cur_->slot_dir_[slot_]].get()); }
  364. const key_type &key() const { return cur_->keys_[cur_->slot_dir_[slot_]]; }
  365. value_type &value() const { return *(cur_->data_[cur_->slot_dir_[slot_]].get()); }
  366. // Prefix++
  367. ConstIterator &operator++();
  368. // Postfix++
  369. ConstIterator operator++(int);
  370. // Prefix--
  371. ConstIterator &operator--();
  372. // Postfix--
  373. ConstIterator operator--(int);
  374. bool operator==(const ConstIterator &x) const { return (x.cur_ == cur_) && (x.slot_ == slot_); }
  375. bool operator!=(const ConstIterator &x) const { return (x.cur_ != cur_) || (x.slot_ != slot_); }
  376. void LockShared() {
  377. cur_->rw_lock_.LockShared();
  378. locked_ = true;
  379. }
  380. void LockExclusive() {
  381. cur_->rw_lock_.LockExclusive();
  382. locked_ = true;
  383. }
  384. void Unlock() {
  385. cur_->rw_lock_.Unlock();
  386. locked_ = false;
  387. }
  388. private:
  389. const typename BPlusTree::LeafNode *cur_;
  390. slot_type slot_;
  391. bool locked_;
  392. };
  393. Iterator begin();
  394. Iterator end();
  395. ConstIterator begin() const;
  396. ConstIterator end() const;
  397. ConstIterator cbegin() const;
  398. ConstIterator cend() const;
  399. // Locate the entry with key
  400. std::pair<ConstIterator, bool> Search(const key_type &key) const;
  401. std::pair<Iterator, bool> Search(const key_type &key);
  402. value_type operator[](key_type key);
  403. };
  404. } // namespace dataset
  405. } // namespace mindspore
  406. #endif // MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_INDEX_H_
  407. #include "btree_impl.tpp"
  408. #include "btree_iterator.tpp"