Zero  0.1.0
btree_page_h.h
Go to the documentation of this file.
1 /*
2  * (c) Copyright 2011-2014, Hewlett-Packard Development Company, LP
3  */
4 
5 #ifndef __BTREE_PAGE_H_H
6 #define __BTREE_PAGE_H_H
7 
8 #include "btree_page.h"
9 
10 #include "w_defines.h"
11 #include "fixable_page_h.h"
12 #include "w_key.h"
13 #include "w_endian.h"
14 #include "w_base.h"
15 
17 
18 struct btree_lf_stats_t;
19 struct btree_int_stats_t;
20 
21 /*
22 * When this page belongs to a foster chain, we need to store
23 * high-fence of right-most sibling in every sibling to do
24 * batch-verification with bitmaps. @see
25 * btree_impl::_ux_verify_volume()
26 */
27 
28 class btree_page_h;
29 
37 class btrec_t {
38 public:
39  btrec_t() {};
40 
41  btrec_t(const btree_page_h& page, slotid_t slot);
42 
43  ~btrec_t() {};
44 
46  btrec_t& set(const btree_page_h& page, slotid_t slot);
47 
48  smsize_t elen() const {
49  return _elem.size();
50  }
51 
52  const w_keystr_t& key() const {
53  return _key;
54  }
55 
56  const cvec_t& elem() const {
57  return _elem;
58  }
59 
61  PageID child() const {
62  return _child;
63  }
64 
65  const lsn_t& child_emlsn() const {
66  return _child_emlsn;
67  }
68 
69  bool is_ghost_record() const {
70  return _ghost_record;
71  }
72 
73 private:
74  friend class btree_page_h;
75 
77 
79 
80  PageID _child; // opaque pointer
86 
88 
89  // disabled
90  btrec_t(const btrec_t&);
91 
92  btrec_t& operator=(const btrec_t&);
93 };
94 
96  set(page, slot);
97 }
98 
99 class btree_impl;
100 
101 template<class T> class btree_ghost_t;
104 
185 class btree_page_h : public fixable_page_h {
186  friend class btree_impl;
187 
188  template<class T> friend class btree_ghost_t;
189  friend class btree_ghost_mark_log;
191 
192  template<class T> friend class page_img_format_t;
193  friend class btree_split_log;
194 
195  btree_page* page() const {
196  return reinterpret_cast<btree_page*>(_pp);
197  }
198 
199  enum {
202  };
203 
204  // Used for page rebalance log record generation purpose
206  char c[sizeof(int16_t)];// int16 = 2 bytes, enough for max. record size in current implementation
207  int16_t is_ghost : 1, // 1 = ghost, 0 = non-ghost
208  len : 15; // record length
209  };
210 
211  // Used for page rebalance log record generation purpose
213  char c[sizeof(PageID)];// int32 = 4 bytes
215  };
216 
217 public:
218  // ======================================================================
219  // BEGIN: Struct/Enum/Constructor
220  // ======================================================================
221 
223 
225 
227 
230  w_assert1(_pp->tag == t_btree_p);
231  return *this;
232  }
233 
234 
235  // ======================================================================
236  // BEGIN: Header Get/Set functions
237  // ======================================================================
238 
239  PageID btree_root() const {
240  return page()->btree_root;
241  }
242 
243  smsize_t used_space() const;
244 
245  // Total usable space on page
246  smsize_t usable_space() const;
247 
249  int level() const;
250 
252  PageID pid0() const;
253 
255  PageID pid0_opaqueptr() const;
256 
258  PageID root() const;
259 
261  bool is_leaf() const;
262 
264  bool is_node() const;
265 
271  bool is_leaf_parent() const;
272 
274  PageID get_foster() const;
275 
277  PageID get_foster_opaqueptr() const;
278 
280  const char* get_prefix_key() const;
281 
283  int16_t get_prefix_length() const;
284 
286  const char* get_fence_low_key() const;
287 
289  int16_t get_fence_low_length() const;
290 
292  void copy_fence_low_key(w_keystr_t& buffer) const {
293  buffer.construct_from_keystr(get_fence_low_key(), get_fence_low_length());
294  }
295 
297  bool is_fence_low_infimum() const {
298  return get_fence_low_key()[0] == SIGN_NEGINF;
299  }
300 
307  const char* get_fence_high_key_noprefix() const;
308 
310  int16_t get_fence_high_length() const;
311 
314  return get_fence_high_length() - get_prefix_length();
315  }
316 
318  void copy_fence_high_key(w_keystr_t& buffer) const {
319  buffer.construct_from_keystr(get_prefix_key(), get_prefix_length(),
320  get_fence_high_key_noprefix(),
321  get_fence_high_length_noprefix());
322  }
323 
325  bool is_fence_high_supremum() const {
326  return get_prefix_length() == 0 && get_fence_high_key_noprefix()[0] == SIGN_POSINF;
327  }
328 
330  const char* get_chain_fence_high_key() const;
331 
333  int16_t get_chain_fence_high_length() const;
334 
336  void copy_chain_fence_high_key(w_keystr_t& buffer) const {
337  buffer.construct_from_keystr(get_chain_fence_high_key(), get_chain_fence_high_length());
338  }
339 
344  bool fence_contains(const w_keystr_t& key) const;
345 
351  int compare_with_fence_low(const w_keystr_t& key) const;
352 
354  int compare_with_fence_low(const char* key, size_t key_len) const;
355 
357  int compare_with_fence_low_noprefix(const char* key, size_t key_len) const;
358 
364  int compare_with_fence_high(const w_keystr_t& key) const;
365 
367  int compare_with_fence_high(const char* key, size_t key_len) const;
368 
370  int compare_with_fence_high_noprefix(const char* key, size_t key_len) const;
371 
377  int compare_with_chain_fence_high(const w_keystr_t& key) const;
378 
380  int compare_with_chain_fence_high(const char* key, size_t key_len) const;
381  // no 'noprefix' version because chain_fence_high might not share the prefix!
382 
392  void accept_empty_child(lsn_t new_lsn, PageID new_page_id, const bool f_redo);
393 
401  rc_t format_foster_child(btree_page_h& parent, const PageID& new_page_id,
402  const w_keystr_t& triggering_key,
403  w_keystr_t& split_key, // OUT: actual split key
404  int& move_count);
405 
414  bool set_foster_child(PageID foster_child_pid,
415  const w_keystr_t& new_fence_high, const w_keystr_t& child_fence_chain);
416 
417  void delete_range(int from, int to);
418 
440  rc_t format_steal(lsn_t new_lsn,
441  const PageID& pid,
442  StoreID store,
443  PageID root,
444  int level,
445  PageID pid0,
446  lsn_t pid0_emlsn,
447  PageID foster,
448  lsn_t foster_emlsn,
449  const w_keystr_t& fence_low,
450  const w_keystr_t& fence_high,
451  const w_keystr_t& chain_fence_high,
452  bool log_it = true,
453  btree_page_h* steal_src1 = nullptr,
454  int steal_from1 = 0,
455  int steal_to1 = 0,
456  btree_page_h* steal_src2 = nullptr,
457  int steal_from2 = 0,
458  int steal_to2 = 0,
459  bool steal_src2_pid0 = false,
460  const bool full_logging = false, // True if doing full logging for record movement
461  const bool log_src_1 = false, // Use only if full_logging = true
462  // True if log movements from src1
463  // False if log movements from src2
464  const bool ghost = false // When _init the page, should the fence key record be a ghost
465  );
466 
468  void _steal_records(btree_page_h* steal_src,
469  int steal_from,
470  int steal_to,
471  const bool full_logging);
472 
473  // Set the node fence keys, no change in data and other page information except record count (if move out)
474  // A special function used by Single-Page-Recovery REDO operation for page rebalance and page merge
475  // when full logging is on (no minimal logging), set the page fence key before the
476  // fully logged actual record movements
477  rc_t init_fence_keys(const bool set_low, const w_keystr_t& low,
478  const bool set_high, const w_keystr_t& high,
479  const bool set_chain, const w_keystr_t& chain_fence_high,
480  const bool set_pid0, const PageID new_pid0,
481  const bool set_emlsn, const lsn_t new_pid0_emlsn,
482  const bool set_foster, const PageID foster_pid0,
483  const bool set_foster_emlsn, const lsn_t foster_emlsn,
484  const int remove_count = 0);
485 
490  rc_t norecord_split(PageID foster, lsn_t foster_emlsn,
491  const w_keystr_t& fence_high,
492  const w_keystr_t& chain_fence_high);
493 
495  bool check_chance_for_norecord_split(const w_keystr_t& key_to_insert) const;
496 
497 
498  // ======================================================================
499  // BEGIN: Public record access functions
500  // ======================================================================
501 
503  int nrecs() const;
504 
506  int nghosts() const;
507 
509  bool is_ghost(slotid_t slot) const;
510 
512  void get_key(slotid_t slot, w_keystr_t& key) const;
513 
520  const char* element(int slot, smsize_t& len, bool& ghost) const;
521 
532  bool copy_element(int slot, char* out_buffer, smsize_t& len, bool& ghost) const;
533 
535  PageID child(slotid_t slot) const;
536 
538  PageID child_opaqueptr(slotid_t slot) const;
539 
556  PageID* page_pointer_address(int offset);
557 
563  size_t get_rec_space(int slot) const;
564 
565  // Determine the prefix length for a page rebalance operation
566  int16_t calculate_prefix_length(int16_t existing_prefix_len, // Existing prefix length
567  w_keystr_t low_key, // New low fence
568  w_keystr_t high_key); // New high fence
569 
570  // Copy the specified number of records into provided buffer (no ghost)
571  // each record must be reconstructed back to the original format (key+data)
572  // Assumption - this function is for page split and page merge purpose,
573  // therefore copy the last 'rec_count' records in the page, in page merge case
574  // 'rec_count' == nrecs()
575  rc_t copy_records(const int rec_count, // In: number of records to copy
576  char* data_buffer, // Out: caller provided data buffer
577  smsize_t& len); // In: buffer size
578  // out: data length
579 
580  // Function to insert records into a page, this is used for page rebalance purpose only
581  // while the incoming buffer contains all the record data plus whatever extra information
582  // per record
583  rc_t insert_records_dest_redo(const int16_t prefix_len, // In: source page prefix length
584  const int32_t move_count, //In: how many records to insert
585  const int16_t record_data_len, // In: total length of data with extra info
586  const char* record_data); // In: Data buffer
587 
588 
589  // ======================================================================
590  // BEGIN: Search functions
591  // ======================================================================
592 
600  void search(const w_keystr_t& key,
601  bool& found_key,
602  slotid_t& return_slot) const {
603  search((const char*)key.buffer_as_keystr(), key.get_length_as_keystr(), found_key, return_slot);
604  }
605 
613  void search(const char* key_raw, size_t key_raw_len,
614  bool& found_key, slotid_t& return_slot) const;
615 
633  void search_node(const w_keystr_t& key,
634  slotid_t& return_slot) const;
635 
636 
637  // ======================================================================
638  // BEGIN: Insert/Update/Delete functions
639  // ======================================================================
640 
647  rc_t insert_node(const w_keystr_t& key,
648  slotid_t slot,
649  PageID child,
650  const lsn_t& child_emlsn);
651 
659  void mark_ghost(slotid_t slot);
660 
667  void unmark_ghost(slotid_t slot);
668 
675  rc_t replace_ghost(const w_keystr_t& key, const cvec_t& elem, bool redo = false);
676 
685  rc_t replace_fence_rec_nolog_may_defrag(const w_keystr_t& low,
686  const w_keystr_t& high, const w_keystr_t& chain,
687  int new_prefix_length = -1);
688 
690  rc_t replace_fence_rec_nolog_no_defrag(const w_keystr_t& low,
691  const w_keystr_t& high, const w_keystr_t& chain, int new_prefix_length);
692 
697  rc_t remove_shift_nolog(slotid_t slot);
698 
704  rc_t replace_el_nolog(slotid_t slot, const cvec_t& elem);
705 
712  void overwrite_el_nolog(slotid_t slot, smsize_t offset,
713  const char* new_el, smsize_t elen);
714 
721  void reserve_ghost(const w_keystr_t& key, size_t element_length) {
722  reserve_ghost((const char*)key.buffer_as_keystr(), key.get_length_as_keystr(), element_length);
723  }
724 
725  // to make it slightly faster. not a neat kind of optimization
726  void reserve_ghost(const char* key_raw, size_t key_raw_len, size_t element_length);
727 
740  void insert_nonghost(const w_keystr_t& key, const cvec_t& elem);
741 
746  bool _is_enough_spacious_ghost(
747  const w_keystr_t& key, slotid_t slot,
748  const cvec_t& el);
749 
755  bool check_space_for_insert_leaf(const w_keystr_t& trunc_key, const cvec_t& el);
756 
757  bool check_space_for_insert_leaf(size_t trunc_key_length, size_t element_length);
758 
760  bool check_space_for_insert_node(const w_keystr_t& key);
761 
774  void suggest_fence_for_split(
775  w_keystr_t& mid, slotid_t& right_begins_from, const w_keystr_t& triggering_key) const;
776 
778  w_keystr_t recalculate_fence_for_split(slotid_t right_begins_from) const;
779 
780  bool is_insertion_extremely_skewed_right() const;
781 
782  bool is_insertion_skewed_right() const;
783 
784  bool is_insertion_skewed_left() const;
785 
786 
787  // ======================================================================
788  // BEGIN: Statistics/Debug etc functions
789  // ======================================================================
790 
801  rc_t defrag(const bool full_logging_redo = false);
802 
807  rc_t compress(const w_keystr_t& low, const w_keystr_t& high,
808  const w_keystr_t& chain, bool redo = false);
809 
811  rc_t leaf_stats(btree_lf_stats_t& btree_lf);
812 
814  rc_t int_stats(btree_int_stats_t& btree_int);
815 
817  void print(bool print_elem = false);
818 
831  bool is_consistent(bool check_keyorder = false, bool check_space = false) const;
832 
834  lsn_t* emlsn_address(general_recordid_t pos);
835 
837  const lsn_t& get_emlsn_general(general_recordid_t pos) const;
838 
846  void set_emlsn_general(general_recordid_t pos, const lsn_t& lsn);
847 
849  const lsn_t& get_foster_emlsn() const;
850 
852  const lsn_t& get_pid0_emlsn() const;
853 
854  /*
855  * The combined sizes of the key (i.e., the number of actual data
856  * bytes it contains) and value must be less than or equal to \ref
857  * max_entry_size, which is a function of the page size, and is
858  * such that two entries of this size fit on a page along with all
859  * the page and entry metadata. See sm_config_info_t and
860  * ss_m::config_info.
861  */
863 
864 private:
865  // ======================================================================
866  // BEGIN: Private record data packers
867  // ======================================================================
868 
870  typedef uint16_t key_length_t;
871 
872  enum _internal {
874  max_key_length = (1 << (sizeof(key_length_t) * 8 - 1)) - 1,
875  };
876 
888  void _pack_node_record(cvec_t& out, const cvec_t& trunc_key, const lsn_t& emlsn) const;
889 
891  typedef key_length_t pack_scratch_t;
892 
905  void _pack_leaf_record(cvec_t& out, pack_scratch_t& out_scratch,
906  const cvec_t& trunc_key,
907  const char* element, size_t element_len) const;
908 
924  void _pack_leaf_record_prefix(cvec_t& out, pack_scratch_t& out_scratch,
925  const cvec_t& trunc_key) const;
926 
944  int _pack_fence_rec(cvec_t& out, const w_keystr_t& low,
945  const w_keystr_t& high,
946  const w_keystr_t& chain,
947  int new_prefix_len) const;
948 
953  size_t _predict_leaf_data_length(int trunc_key_length, int element_length) const;
954 
961  typedef uint16_t poor_man_key;
962 
964  poor_man_key _extract_poor_man_key(const void* trunc_key, size_t trunc_key_len) const;
965 
967  poor_man_key _extract_poor_man_key(const cvec_t& trunc_key) const;
968 
970  poor_man_key _extract_poor_man_key(const void* key_with_prefix,
971  size_t key_len_with_prefix, size_t prefix_len) const;
972 
973 
974  // ======================================================================
975  // BEGIN: Private record accessors
976  // ======================================================================
977 
979  poor_man_key _poor(int slot) const;
980 
986  const char* _leaf_key_noprefix(slotid_t slot, size_t& len) const;
987 
993  const char* _node_key_noprefix(slotid_t slot, size_t& len) const;
994 
1002  size_t _element_offset(int slot) const;
1003 
1005  int _compare_key_noprefix(slotid_t slot, const void* key_noprefix, size_t key_len) const;
1006 
1009  int _compare_slot_with_key(int slot, const void* key_noprefix, size_t key_len, poor_man_key poor) const;
1010 
1011  // ======================================================================
1012  // BEGIN: Miscellaneous private members
1013  // ======================================================================
1014 
1016  bool _is_consistent_keyorder() const;
1017 
1019  bool _is_consistent_poormankey() const;
1020 
1022  void _update_btree_consecutive_skewed_insertions(slotid_t slot);
1023 
1029  bool _check_space_for_insert(size_t data_length);
1030 
1034  void _init(lsn_t lsn, PageID page_id, StoreID store,
1035  PageID root_pid, PageID pid0, lsn_t pid0_emlsn,
1036  PageID foster_pid, lsn_t foster_emlsn, int16_t btree_level,
1037  const w_keystr_t& low, const w_keystr_t& high,
1038  const w_keystr_t& chain_fence_high, const bool ghost);
1039 
1040 public:
1041  friend std::ostream& operator<<(std::ostream& os, btree_page_h& b);
1042 };
1043 
1056 
1057 public:
1059  btree_page_h(),
1060  _source(source) {
1061  w_assert1(_source->_pp != nullptr);
1062  _pp = _source->get_generic_page();
1063  _bufferpool_managed = _source->_bufferpool_managed;
1064  _mode = _source->_mode;
1065  _Q_ticket = _source->_Q_ticket;
1066  _source->_mode = LATCH_NL;
1067  }
1068 
1070  w_assert1(_source->_pp == _pp);
1071  w_assert1(_source->_mode == LATCH_NL);
1072  _source->_mode = _mode;
1073  _source->_Q_ticket = _Q_ticket;
1074 
1075  // prevent superclass destructor from trying to unfix us in the bufferpool:
1076  _bufferpool_managed = false;
1077  _mode = LATCH_NL;
1078  }
1079 };
1080 
1083  base_stat_t hdr_bs; /* page header (overhead) */
1084  base_stat_t key_bs; /* space used for keys */
1085  base_stat_t data_bs; /* space for data associated to keys */
1086  base_stat_t entry_overhead_bs; /* e.g., slot header, padding for alignment */
1088 
1090 
1091  base_stat_t unique_cnt; /* number of unique entries */
1092 
1094  clear();
1095  }
1096 
1097  void add(const btree_lf_stats_t& stats);
1098 
1099  void clear();
1100 
1101  w_rc_t audit() const;
1102 
1103  base_stat_t total_bytes() const;
1104 
1105  void print(ostream&, const char*) const;/* pretty print */
1106  friend ostream& operator<<(ostream&, const btree_lf_stats_t& s);
1107 };
1108 
1112 
1114 
1116  clear();
1117  }
1118 
1119  void add(const btree_int_stats_t& stats);
1120 
1121  void clear();
1122 
1123  w_rc_t audit() const;
1124 
1125  base_stat_t total_bytes() const;
1126 
1127  void print(ostream&, const char*) const;/* pretty print */
1128  friend ostream& operator<<(ostream&, const btree_int_stats_t& s);
1129 };
1130 
1131 // returns by bulk load and
1132 // btree_m::get_du_statistics
1134  btree_lf_stats_t leaf_pg; // byte counts for leaf pages
1135  btree_int_stats_t int_pg; // byte counts for interior pages
1136 
1137  base_stat_t leaf_pg_cnt; // level-1 pages found by tree traversal
1138  base_stat_t int_pg_cnt; // level>1 pages found by tree traversal
1139  base_stat_t unlink_pg_cnt; // pages allocated but not accounted-for
1140  // in a tree traversal; allocated count is
1141  // found by first_page/next_page traversal
1142  // of the store's extents
1143  // Given the way this is computed, it would be
1144  // pretty hard to fail an audit.
1145  // It would be nice if unlinked pages could be
1146  // accounted-for in another way for a meaningful audit.
1147  /* unlinked pages are empty and
1148  // will be freed the next
1149  // time they are encountered
1150  // during a traversal
1151  */
1152  base_stat_t unalloc_pg_cnt; // pages not-allocated by extent-traversal
1153  base_stat_t level_cnt; /* number of levels in btree */
1154 
1156  clear();
1157  }
1158 
1159  void add(const btree_stats_t& stats);
1160 
1161  void clear();
1162 
1163  w_rc_t audit() const;
1164 
1165  base_stat_t total_bytes() const;
1166 
1167  base_stat_t alloc_pg_cnt() const;
1168 
1169  void print(ostream&, const char*) const;/* pretty print */
1170  friend ostream& operator<<(ostream&, const btree_stats_t& s);
1171 };
1172 
1173 // ======================================================================
1174 // BEGIN: Inline function implementations
1175 // ======================================================================
1176 
1177 inline PageID btree_page_h::root() const {
1178  return page()->btree_root;
1179 }
1180 
1181 inline int btree_page_h::level() const {
1182  return page()->btree_level;
1183 }
1184 
1186  return page()->btree_pid0;
1187 }
1188 
1189 inline bool btree_page_h::is_leaf() const {
1190  return level() == 1;
1191 }
1192 
1193 inline bool btree_page_h::is_leaf_parent() const {
1194  return level() == 2;
1195 }
1196 
1197 inline bool btree_page_h::is_node() const {
1198  return !is_leaf();
1199 }
1200 
1202  return page()->btree_foster;
1203 }
1204 
1205 inline int16_t btree_page_h::get_prefix_length() const {
1206  return page()->btree_prefix_length;
1207 }
1208 
1209 inline int16_t btree_page_h::get_fence_low_length() const {
1210  return page()->btree_fence_low_length;
1211 }
1212 
1213 inline int16_t btree_page_h::get_fence_high_length() const {
1214  return page()->btree_fence_high_length;
1215 }
1216 
1218  return page()->btree_chain_fence_high_length;
1219 }
1220 
1221 inline const char* btree_page_h::get_fence_low_key() const {
1222  return page()->item_data(0);
1223 }
1224 
1225 inline const char* btree_page_h::get_fence_high_key_noprefix() const {
1226  return page()->item_data(0) + get_fence_low_length();
1227 }
1228 
1229 inline const char* btree_page_h::get_chain_fence_high_key() const {
1230  return page()->item_data(0) + get_fence_low_length() + get_fence_high_length_noprefix();
1231 }
1232 
1233 inline const char* btree_page_h::get_prefix_key() const {
1234  return get_fence_low_key(); // same thing. only the length differs.
1235 }
1236 
1237 inline int btree_page_h::nrecs() const {
1238  return page()->number_of_items() - 1;
1239 }
1240 
1241 inline int btree_page_h::nghosts() const {
1242  return page()->number_of_ghosts();
1243 }
1244 
1246  return key.compare_keystr(get_fence_low_key(), get_fence_low_length());
1247 }
1248 
1249 inline int btree_page_h::compare_with_fence_low(const char* key, size_t key_len) const {
1250  return w_keystr_t::compare_bin_str(key, key_len, get_fence_low_key(), get_fence_low_length());
1251 }
1252 
1253 inline int btree_page_h::compare_with_fence_low_noprefix(const char* key, size_t key_len) const {
1254  return w_keystr_t::compare_bin_str(key, key_len, get_fence_low_key() + get_prefix_length(),
1255  get_fence_low_length() - get_prefix_length());
1256 }
1257 
1259  return compare_with_fence_high((const char*)key.buffer_as_keystr(), key.get_length_as_keystr());
1260 }
1261 
1262 inline int btree_page_h::compare_with_fence_high(const char* key, size_t key_len) const {
1263  size_t prefix_len = get_prefix_length();
1264  if (prefix_len > key_len) {
1265  return w_keystr_t::compare_bin_str(key, key_len, get_prefix_key(), key_len);
1266  } else {
1267  // first, compare with prefix part
1268  int ret = w_keystr_t::compare_bin_str(key, prefix_len, get_prefix_key(), prefix_len);
1269  if (ret != 0) {
1270  return ret;
1271  }
1272  // then, compare with suffix part
1273  return w_keystr_t::compare_bin_str(key + prefix_len, key_len - prefix_len,
1274  get_fence_high_key_noprefix(), get_fence_high_length_noprefix());
1275  }
1276 }
1277 
1278 inline int btree_page_h::compare_with_fence_high_noprefix(const char* key, size_t key_len) const {
1279  return w_keystr_t::compare_bin_str(key, key_len, get_fence_high_key_noprefix(), get_fence_high_length_noprefix());
1280 }
1281 
1283  return key.compare_keystr(get_chain_fence_high_key(), get_chain_fence_high_length());
1284 }
1285 
1286 inline int btree_page_h::compare_with_chain_fence_high(const char* key, size_t key_len) const {
1287  return w_keystr_t::compare_bin_str(key, key_len, get_chain_fence_high_key(), get_chain_fence_high_length());
1288 }
1289 
1290 inline bool btree_page_h::fence_contains(const w_keystr_t& key) const {
1291  // fence-low is inclusive
1292  if (compare_with_fence_low(key) < 0) {
1293  return false;
1294  }
1295  // fence-high is exclusive (but if it's supremum, we allow it to handle to-the-end scan)
1296  if (!is_fence_high_supremum() && compare_with_fence_high(key) >= 0) {
1297  return false;
1298  }
1299  return true;
1300 }
1301 
1303  // this means completely pre-sorted insertion like bulk loading.
1304  int ins = page()->btree_consecutive_skewed_insertions;
1305  return ins > 50
1306  || ins > nrecs() * 9 / 10
1307  || (ins > 1 && ins >= nrecs() - 1);
1308 }
1309 
1311  return page()->btree_consecutive_skewed_insertions > 5;
1312 }
1313 
1315  return page()->btree_consecutive_skewed_insertions < -5;
1316 }
1317 
1319  w_assert1(is_node());
1320  w_assert1(slot >= 0);
1321  w_assert1(slot < nrecs());
1322  return page()->item_child(slot + 1);
1323 }
1324 
1326  if (offset == -2) {
1327  return &page()->btree_foster;
1328  }
1329 
1330  w_assert1(!is_leaf());
1331  w_assert1(-2 < offset && offset < nrecs());
1332 
1333  if (offset == -1) {
1334  return &page()->btree_pid0;
1335  }
1336 
1337  // CS TODO: why +1 here? what happens when offset == 0?
1338  return &page()->item_child(offset + 1);
1339 }
1340 
1341 inline size_t btree_page_h::get_rec_space(int slot) const {
1342  w_assert1(slot >= 0);
1343  return page()->item_space(slot + 1);
1344 }
1345 
1346 inline bool btree_page_h::is_ghost(slotid_t slot) const {
1347  return page()->is_ghost(slot + 1);
1348 }
1349 
1350 inline smsize_t
1352  return data_sz - page()->usable_space();
1353 }
1354 
1355 inline smsize_t
1357  return page()->usable_space();
1358 }
1359 
1360 
1361 // ======================================================================
1362 // BEGIN: Private record data packers inline implementation
1363 // ======================================================================
1364 
1365 inline void btree_page_h::_pack_node_record(cvec_t& out, const cvec_t& trunc_key,
1366  const lsn_t& emlsn) const {
1367  w_assert1(!is_leaf());
1368  out.put(trunc_key);
1369  out.put(&emlsn, sizeof(lsn_t));
1370 }
1371 
1373  const cvec_t& trunc_key) const {
1374  w_assert1(is_leaf());
1375  out_scratch = trunc_key.size();
1376  out.put(&out_scratch, sizeof(out_scratch));
1377  out.put(trunc_key);
1378 }
1379 
1381  const cvec_t& trunc_key,
1382  const char* element, size_t element_len) const {
1383  _pack_leaf_record_prefix(out, out_scratch, trunc_key);
1384  out.put(element, element_len);
1385 }
1386 
1388  const w_keystr_t& high,
1389  const w_keystr_t& chain,
1390  int new_prefix_len) const {
1391  int prefix_len;
1392  if (new_prefix_len >= 0) {
1393  w_assert1(low.common_leading_bytes(high) >= (size_t)new_prefix_len);
1394  prefix_len = new_prefix_len;
1395  } else {
1396  prefix_len = low.common_leading_bytes(high);
1397  }
1398 
1399  out.put(low);
1400  // eliminate prefix part from high:
1401  out.put((const char*)high.buffer_as_keystr() + prefix_len,
1402  high.get_length_as_keystr() - prefix_len);
1403  if (!chain.is_constructed()) {
1404  // If chain-high is not set, we don't need to put it here. However,
1405  // we will at least need fench-high length of capacity when we split this page.
1406  // As running out of space even for splitting is a severe issue, we pre-allocate
1407  // a fence-high length of capacity here.
1408  out.put(high);
1409  } else {
1410  out.put(chain);
1411  if (chain.get_length_as_keystr() < high.get_length_as_keystr()) {
1412  // for the same purpose, put additional bytes
1413  out.put(high.buffer_as_keystr(),
1414  high.get_length_as_keystr() - chain.get_length_as_keystr());
1415  }
1416  }
1417  return prefix_len;
1418 }
1419 
1420 inline size_t btree_page_h::_predict_leaf_data_length(int trunc_key_length,
1421  int element_length) const {
1422  return sizeof(key_length_t) + trunc_key_length + element_length;
1423 }
1424 
1426  size_t trunc_key_len) const {
1427  if (trunc_key_len == 0) {
1428  return 0;
1429  } else if (trunc_key_len == 1) {
1430  return (*reinterpret_cast<const unsigned char*>(trunc_key)) << 8;
1431  } else {
1432  // convert big-endian array (usable with memcmp) into 16-bit integer (little-endian)
1433  return deserialize16_ho(trunc_key);
1434  }
1435 }
1436 
1438  char start[2];
1439  trunc_key.copy_to(start, 2);
1440  return _extract_poor_man_key(start, trunc_key.size());
1441 }
1442 
1444 btree_page_h::_extract_poor_man_key(const void* key_with_prefix, size_t key_len_with_prefix,
1445  size_t prefix_len) const {
1446  w_assert1(prefix_len <= key_len_with_prefix);
1447  return _extract_poor_man_key(((const char*)key_with_prefix) + prefix_len, key_len_with_prefix - prefix_len);
1448 }
1449 
1450 
1451 // ======================================================================
1452 // BEGIN: Private [robust] record accessors inline implementation
1453 // ======================================================================
1454 
1456  w_assert1(slot >= 0);
1457  return page()->item_poor(slot + 1);
1458 }
1459 
1460 inline const char* btree_page_h::_leaf_key_noprefix(slotid_t slot, size_t& len) const {
1461  w_assert1(is_leaf());
1462  w_assert1(slot >= 0);
1463 
1464  key_length_t* data = (key_length_t*)page()->item_data(slot + 1);
1465  len = *data++;
1466  return (const char*)data;
1467 }
1468 
1469 inline const char* btree_page_h::_node_key_noprefix(slotid_t slot, size_t& len) const {
1470  w_assert1(is_node());
1471  w_assert1(slot >= 0);
1472 
1473  len = page()->item_length(slot + 1) - sizeof(lsn_t);
1474  return page()->item_data(slot + 1);
1475 }
1476 
1477 inline size_t btree_page_h::_element_offset(int slot) const {
1478  w_assert1(is_leaf());
1479  w_assert1(slot >= 0);
1480 
1481  size_t key_noprefix_length;
1482  (void)_leaf_key_noprefix(slot, key_noprefix_length);
1483 
1484  return key_noprefix_length + sizeof(key_length_t);
1485 }
1486 
1487 inline int btree_page_h::_compare_key_noprefix(slotid_t slot, const void* key_noprefix,
1488  size_t key_len) const {
1489  size_t curkey_len;
1490  const char* curkey;
1491  if (is_leaf()) {
1492  curkey = _leaf_key_noprefix(slot, curkey_len);
1493  } else {
1494  curkey = _node_key_noprefix(slot, curkey_len);
1495  }
1496 
1497  return w_keystr_t::compare_bin_str(curkey, curkey_len, key_noprefix, key_len);
1498 }
1499 
1500 // ======================================================================
1501 // BEGIN: Single-Page-Recovery related EMLSN accessors implementation
1502 // ======================================================================
1503 
1505  if (pos == GeneralRecordIds::PID0) {
1506  return &page()->btree_pid0_emlsn;
1507  } else if (pos == GeneralRecordIds::FOSTER_CHILD) {
1508  // this means foster child
1509  return &page()->btree_foster_emlsn;
1510  } else {
1511  // last 8 bytes after usual item_data is the child EMLSN.
1512  // Note: these are "pos", not "pos+1" because it's general_recordid_t, not slotid_t
1513  char* data = page()->item_data(pos);
1514  size_t len = page()->item_length(pos);
1515  w_assert1(len >= sizeof(lsn_t));
1516  len -= sizeof(lsn_t);
1517  return reinterpret_cast<lsn_t*>(data + len);
1518  }
1519 }
1520 
1522  return *(const_cast<btree_page_h*>(this)->emlsn_address(pos));
1523 }
1524 
1526  *emlsn_address(pos) = lsn;
1527 }
1528 
1529 inline const lsn_t& btree_page_h::get_foster_emlsn() const {
1530  return page()->btree_foster_emlsn;
1531 }
1532 
1533 inline const lsn_t& btree_page_h::get_pid0_emlsn() const {
1534  return page()->btree_pid0_emlsn;
1535 }
1536 
1537 #endif // __BTREE_PAGE_H_H
bool is_leaf() const
Is associated page a leaf?
Definition: btree_page_h.h:1189
Definition: btree_page_h.h:1110
base_stat_t used_bs
Definition: btree_page_h.h:1111
PageID btree_root() const
Definition: btree_page_h.h:239
Definition: logdef_gen.h:353
cvec_t & put(const cvec_t &v, size_t offset, size_t nbytes)
w_keystr_len_t common_leading_bytes(const w_keystr_t &r) const
Definition: w_key.h:399
bool is_ghost(slotid_t slot) const
Returns if the specified record is a ghost record.
Definition: btree_page_h.h:1346
bool _ghost_record
Definition: btree_page_h.h:76
fixable_page_h * _source
Definition: btree_page_h.h:1055
smsize_t usable_space() const
Definition: btree_page_h.h:1356
Definition: basics.h:81
void copy_fence_high_key(w_keystr_t &buffer) const
Constructs w_keystr_t object containing the low-fence key of this page.
Definition: btree_page_h.h:318
size of region available to store items
Definition: btree_page.h:46
~btree_page_h()
Definition: btree_page_h.h:226
latch_mode_t _mode
Definition: fixable_page_h.h:255
base_stat_t unalloc_pg_cnt
Definition: btree_page_h.h:1152
const lsn_t & get_pid0_emlsn() const
Definition: btree_page_h.h:1533
cvec_t _elem
Definition: btree_page_h.h:87
size_t _element_offset(int slot) const
Definition: btree_page_h.h:1477
#define w_assert1(x)
Level 1 should not add significant extra time.
Definition: w_base.h:198
uint32_t smsize_t
Definition: basics.h:41
int compare_with_fence_high(const w_keystr_t &key) const
Definition: btree_page_h.h:1258
const unsigned char SIGN_POSINF
Definition: w_key.h:21
PageID * page_pointer_address(int offset)
Definition: btree_page_h.h:1325
int nghosts() const
Returns the number of ghosts records in this page.
Definition: btree_page_h.h:1241
w_base_t::base_stat_t base_stat_t
Definition: btree_page_h.h:16
int compare_with_fence_low_noprefix(const char *key, size_t key_len) const
used when the prefix part is already checked key/key_len must be WITHOUT prefix.
Definition: btree_page_h.h:1253
int16_t general_recordid_t
An integer to point to any record in B-tree pages.
Definition: basics.h:71
Page handle for B-Tree data page.
Definition: btree_page_h.h:185
void copy_fence_low_key(w_keystr_t &buffer) const
Constructs w_keystr_t object containing the low-fence key of this page.
Definition: btree_page_h.h:292
bool _bufferpool_managed
is our associated page managed by the buffer pool?
Definition: fixable_page_h.h:254
Definition: latch.h:80
uint32_t StoreID
Definition: basics.h:47
base_stat_t unique_cnt
Definition: btree_page_h.h:1091
const lsn_t & get_emlsn_general(general_recordid_t pos) const
Definition: btree_page_h.h:1521
bool is_ghost_record() const
Definition: btree_page_h.h:69
Key string class which can represent a few special values.
Definition: w_key.h:47
btrec_t()
Definition: btree_page_h.h:39
_internal
Definition: btree_page_h.h:872
int16_t get_prefix_length() const
Returns the length of prefix key (0 means no prefix compression).
Definition: btree_page_h.h:1205
bool fence_contains(const w_keystr_t &key) const
Definition: btree_page_h.h:1290
const cvec_t & elem() const
Definition: btree_page_h.h:56
q_ticket_t _Q_ticket
Definition: fixable_page_h.h:257
uint16_t key_length_t
A field to hold a B-tree key length.
Definition: btree_page_h.h:870
static int compare_bin_str(const void *str1, int len1, const void *str2, int len2)
Definition: w_key.h:386
The internal implementation class which actually implements the functions of btree_m.
Definition: btree_impl.h:66
bool is_insertion_skewed_right() const
Definition: btree_page_h.h:1310
base_stat_t int_pg_cnt
Definition: btree_page_h.h:1138
uint64_t get_key(int sf, int specificPrefix, int tspread)
Definition: ycsb.cpp:29
base_stat_t data_bs
Definition: btree_page_h.h:1085
const char * get_prefix_key() const
Returns the prefix which is removed from all entries in this page.
Definition: btree_page_h.h:1233
A constant vec_t (meaning things pointed to cannot be changed).
Definition: vec_t.h:95
bool construct_from_keystr(const void *keystr, w_keystr_len_t length)
Definition: w_key.h:320
int32_t base_stat_t
Definition: w_base.h:296
Definition: btree_page_h.h:1133
int _compare_key_noprefix(slotid_t slot, const void *key_noprefix, size_t key_len) const
returns compare(specified-key, key_noprefix)
Definition: btree_page_h.h:1487
const char * get_fence_high_key_noprefix() const
Definition: btree_page_h.h:1225
bool is_insertion_extremely_skewed_right() const
Definition: btree_page_h.h:1302
Definition: logrec_support.h:19
size_t copy_to(void *p, size_t limit=0x7fffffff) const
int level() const
Returns 1 if leaf, >1 if non-leaf.
Definition: btree_page_h.h:1181
bool is_node() const
Returns if this page is NOT a leaf.
Definition: btree_page_h.h:1197
uint16_t poor_man_key
Poor man&#39;s normalized key type.
Definition: btree_page_h.h:961
btree_lf_stats_t()
Definition: btree_page_h.h:1093
btree_stats_t()
Definition: btree_page_h.h:1155
Represents a record in BTree.
Definition: btree_page_h.h:37
poor_man_key _extract_poor_man_key(const void *trunc_key, size_t trunc_key_len) const
Returns the value of poor-man&#39;s normalized key for the given key string WITHOUT prefix.
Definition: btree_page_h.h:1425
int compare_with_fence_low(const w_keystr_t &key) const
Definition: btree_page_h.h:1245
PageID i
Definition: btree_page_h.h:214
bool is_fence_low_infimum() const
Returns if the low-fence key is infimum.
Definition: btree_page_h.h:297
uint32_t PageID
Definition: basics.h:45
key_length_t pack_scratch_t
type of scratch space needed by _pack_leaf_record
Definition: btree_page_h.h:891
btree_page * page() const
Definition: btree_page_h.h:195
base_stat_t unlink_pg_cnt
Definition: btree_page_h.h:1139
size of all header fields combined
Definition: btree_page.h:41
uint16_t deserialize16_ho(const void *buf)
Definition: w_endian.h:82
poor_man_key _poor(int slot) const
Return poor man&#39;s key data for given slot.
Definition: btree_page_h.h:1455
Log Sequence Number. See Log Sequence Numbers (LSN).
Definition: lsn.h:243
btrec_t & operator=(const btrec_t &)
fixable_page_h & operator=(fixable_page_h &p)
Definition: fixable_page_h.h:45
int _pack_fence_rec(cvec_t &out, const w_keystr_t &low, const w_keystr_t &high, const w_keystr_t &chain, int new_prefix_len) const
Definition: btree_page_h.h:1387
bool is_leaf_parent() const
Definition: btree_page_h.h:1193
Handle class for pages that may be fixed (i.e., paged in by the main buffer manager, zero::buffer_pool::BufferPool)
Definition: fixable_page_h.h:26
base_stat_t entry_cnt
Definition: btree_page_h.h:1089
bool is_constructed() const
Definition: w_key.h:382
void _pack_leaf_record(cvec_t &out, pack_scratch_t &out_scratch, const cvec_t &trunc_key, const char *element, size_t element_len) const
Definition: btree_page_h.h:1380
btree page
Definition: generic_page.h:90
void set_emlsn_general(general_recordid_t pos, const lsn_t &lsn)
Sets the Expected Child LSN of pid0, foster-child, or real child.
Definition: btree_page_h.h:1525
Definition: btree_page_h.h:212
int16_t get_fence_high_length_noprefix() const
Returns the length of high fence key without prefix.
Definition: btree_page_h.h:313
void _pack_node_record(cvec_t &out, const cvec_t &trunc_key, const lsn_t &emlsn) const
Definition: btree_page_h.h:1365
borrowed_btree_page_h(fixable_page_h *source)
Definition: btree_page_h.h:1058
base_stat_t key_bs
Definition: btree_page_h.h:1084
~borrowed_btree_page_h()
Definition: btree_page_h.h:1069
int compare_keystr(const void *keystr, w_keystr_len_t length) const
Definition: w_key.h:428
void _pack_leaf_record_prefix(cvec_t &out, pack_scratch_t &out_scratch, const cvec_t &trunc_key) const
Definition: btree_page_h.h:1372
Definition: logdef_gen.h:322
base_stat_t entry_overhead_bs
Definition: btree_page_h.h:1086
size_t size() const
returns # bytes this vector references
Definition: vec_t.h:174
Return code for most functions and methods.
Definition: w_rc.h:87
base_stat_t leaf_pg_cnt
Definition: btree_page_h.h:1137
~btrec_t()
Definition: btree_page_h.h:43
int16_t slotid_t
Definition: basics.h:53
w_keystr_len_t get_length_as_keystr() const
Definition: w_key.h:508
const char * _node_key_noprefix(slotid_t slot, size_t &len) const
Definition: btree_page_h.h:1469
const unsigned char SIGN_NEGINF
Definition: w_key.h:17
btree_int_stats_t()
Definition: btree_page_h.h:1115
std::ostream & operator<<(std::ostream &os, const ConfigFile &cf)
Definition: confparser.cpp:83
void reserve_ghost(const w_keystr_t &key, size_t element_length)
Definition: btree_page_h.h:721
lsn_t * emlsn_address(general_recordid_t pos)
Definition: btree_page_h.h:1504
const w_keystr_t & key() const
Definition: btree_page_h.h:52
btree_int_stats_t int_pg
Definition: btree_page_h.h:1135
PageID root() const
Returns root page; used for recovery.
Definition: btree_page_h.h:1177
base_stat_t unused_bs
Definition: btree_page_h.h:1087
btree_page_h & operator=(btree_page_h &p)
Definition: btree_page_h.h:228
bool is_fence_high_supremum() const
Returns if the high-fence key is supremum.
Definition: btree_page_h.h:325
void copy_chain_fence_high_key(w_keystr_t &buffer) const
Constructs w_keystr_t object containing the low-fence key of this page.
Definition: btree_page_h.h:336
btree_page_h(const btree_page_h &p)
Definition: btree_page_h.h:224
Definition: btree_page_h.h:1082
const char * _leaf_key_noprefix(slotid_t slot, size_t &len) const
Definition: btree_page_h.h:1460
size_t _predict_leaf_data_length(int trunc_key_length, int element_length) const
Definition: btree_page_h.h:1420
base_stat_t level_cnt
Definition: btree_page_h.h:1153
size_t get_rec_space(int slot) const
Definition: btree_page_h.h:1341
const char * get_fence_low_key() const
Returns the low fence key, which is same OR smaller than all entries in this page and its descendants...
Definition: btree_page_h.h:1221
const char * get_chain_fence_high_key() const
Returns the high fence key of foster chain.
Definition: btree_page_h.h:1229
bool is_insertion_skewed_left() const
Definition: btree_page_h.h:1314
const lsn_t & get_foster_emlsn() const
Definition: btree_page_h.h:1529
PageID child_opaqueptr(slotid_t slot) const
Return the opaque child pointer of record in slot.
Definition: btree_page_h.h:1318
lsn_t _child_emlsn
Definition: btree_page_h.h:85
smsize_t used_space() const
Definition: btree_page_h.h:1351
smsize_t elen() const
Definition: btree_page_h.h:48
btree_lf_stats_t leaf_pg
Definition: btree_page_h.h:1134
const void * buffer_as_keystr() const
Definition: w_key.h:504
void search(const w_keystr_t &key, bool &found_key, slotid_t &return_slot) const
Definition: btree_page_h.h:600
PageID get_foster_opaqueptr() const
Returns opaque pointer of B-link page (0 if not linked).
Definition: btree_page_h.h:1201
generic_page * get_generic_page() const
return pointer to underlying page
Definition: generic_page.h:142
w_keystr_t _key
Definition: btree_page_h.h:78
Specialized variant of btree_page_h that borrows a B-tree page from a fixable_page_h.
Definition: btree_page_h.h:1054
btree_page_h()
Definition: btree_page_h.h:222
generic_page * _pp
The actual page we are handling; may be NULL for fixable pages.
Definition: generic_page.h:173
Definition: btree_logrec.h:140
PageID pid0_opaqueptr() const
Returns left-most opaque pointer (used only in non-leaf nodes).
Definition: btree_page_h.h:1185
static smsize_t max_entry_size
Definition: btree_page_h.h:862
int nrecs() const
Returns the number of records in this page.
Definition: btree_page_h.h:1237
int16_t get_fence_high_length() const
Returns the length of high fence key with prefix.
Definition: btree_page_h.h:1213
PageID _child
Definition: btree_page_h.h:80
base_stat_t hdr_bs
Definition: btree_page_h.h:1083
PageID child() const
returns the opaque version
Definition: btree_page_h.h:61
base_stat_t unused_bs
Definition: btree_page_h.h:1113
B-tree page.
Definition: btree_page.h:518
int16_t get_fence_low_length() const
Returns the length of low fence key.
Definition: btree_page_h.h:1209
int compare_with_chain_fence_high(const w_keystr_t &key) const
Definition: btree_page_h.h:1282
int16_t get_chain_fence_high_length() const
Returns the length of high fence key of foster chain.
Definition: btree_page_h.h:1217
Definition: basics.h:83
const lsn_t & child_emlsn() const
Definition: btree_page_h.h:65
Definition: logdef_gen.h:309
Definition: btree_page_h.h:205
int compare_with_fence_high_noprefix(const char *key, size_t key_len) const
used when the prefix part is already checked key/key_len must be WITHOUT prefix.
Definition: btree_page_h.h:1278