Zero  0.1.0
log_core.h
Go to the documentation of this file.
1 /*
2  * (c) Copyright 2011-2014, Hewlett-Packard Development Company, LP
3  */
4 
5 
6 /* -*- mode:C++; c-basic-offset:4 -*-
7  Shore-MT -- Multi-threaded port of the SHORE storage manager
8 
9  Copyright (c) 2007-2009
10  Data Intensive Applications and Systems Labaratory (DIAS)
11  Ecole Polytechnique Federale de Lausanne
12 
13  All Rights Reserved.
14 
15  Permission to use, copy, modify and distribute this software and
16  its documentation is hereby granted, provided that both the
17  copyright notice and this permission notice appear in all copies of
18  the software, derivative works or modified versions, and any
19  portions thereof, and that both notices appear in supporting
20  documentation.
21 
22  This code is distributed in the hope that it will be useful, but
23  WITHOUT ANY WARRANTY; without even the implied warranty of
24  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. THE AUTHORS
25  DISCLAIM ANY LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER
26  RESULTING FROM THE USE OF THIS SOFTWARE.
27 */
28 
29 /*<std-header orig-src='shore' incl-file-exclusion='SRV_LOG_H'>
30 
31  $Id: log_core.h,v 1.11 2010/09/21 14:26:19 nhall Exp $
32 
33 SHORE -- Scalable Heterogeneous Object REpository
34 
35 Copyright (c) 1994-99 Computer Sciences Department, University of
36  Wisconsin -- Madison
37 All Rights Reserved.
38 
39 Permission to use, copy, modify and distribute this software and its
40 documentation is hereby granted, provided that both the copyright
41 notice and this permission notice appear in all copies of the
42 software, derivative works or modified versions, and any portions
43 thereof, and that both notices appear in supporting documentation.
44 
45 THE AUTHORS AND THE COMPUTER SCIENCES DEPARTMENT OF THE UNIVERSITY
46 OF WISCONSIN - MADISON ALLOW FREE USE OF THIS SOFTWARE IN ITS
47 "AS IS" CONDITION, AND THEY DISCLAIM ANY LIABILITY OF ANY KIND
48 FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
49 
50 This software was developed with support by the Advanced Research
51 Project Agency, ARPA order number 018 (formerly 8230), monitored by
52 the U.S. Army Research Laboratory under contract DAAB07-91-C-Q518.
53 Further funding for this work was provided by DARPA through
54 Rome Research Laboratory Contract No. F30602-97-2-0247.
55 
56 */
57 
58 #ifndef __LOG_CORE_H
59 #define __LOG_CORE_H
60 
61 #include "w_defines.h"
62 
63 /* -- do not edit anything above this line -- </std-header>*/
64 
65 #include "AtomicCounter.hpp"
66 #include <vector> // only for _collect_single_page_recovery_logs()
67 #include <limits>
68 
69 // in sm_base for the purpose of log callback function argument type
70 class partition_t; // forward
71 
72 class sm_options;
73 class ConsolidationArray;
74 struct CArraySlot;
76 class plog_xct_t;
77 class ticker_thread_t;
80 
81 #include "partition.h"
82 #include "mcs_lock.h"
83 #include "tatas.h"
84 #include "log_storage.h"
85 #include "stopwatch.h"
86 
87 class log_core {
88 public:
89  log_core(const sm_options&);
90 
91  virtual ~log_core();
92 
93  rc_t init();
94 
95  static const std::string IMPL_NAME;
96 
97  rc_t insert(logrec_t& r, lsn_t* l = nullptr);
98 
99  rc_t flush(const lsn_t& lsn, bool block = true, bool signal = true, bool* ret_flushed = nullptr);
100 
101  rc_t flush_all(bool block = true) {
102  return flush(curr_lsn().advance(-1), block);
103  }
104 
105  rc_t compensate(const lsn_t& orig_lsn, const lsn_t& undo_lsn);
106 
107  rc_t fetch(lsn_t& lsn, void* buf, lsn_t* nxt, const bool forward);
108 
109  bool fetch_direct(lsn_t lsn, logrec_t*& lr, lsn_t& prev_lsn);
110 
111  void shutdown();
112 
113  rc_t truncate();
114 
115  lsn_t curr_lsn() const {
116  return _curr_lsn;
117  }
118 
119  lsn_t durable_lsn() const {
120  return _durable_lsn;
121  }
122 
123  void start_flush_daemon();
124 
125  long segsize() const {
126  return _segsize;
127  }
128 
129  void flush_daemon();
130 
131  lsn_t flush_daemon_work(lsn_t old_mark);
132 
134 
137 
138  // log buffer segment size = 128 MB
139  enum {
141  };
142 
143  // Functions delegated to log_storage (CS TODO)
144  string make_log_name(uint32_t p) {
145  return _storage->make_log_name(p);
146  }
147 
149  return _storage;
150  }
151 
153  return _oldest_lsn_tracker;
154  }
155 
157 
158  static lsn_t first_lsn(uint32_t pnum) {
159  return lsn_t(pnum, 0);
160  }
161 
163  return _page_img_compression;
164  }
165 
166 protected:
167 
168  char* _buf; // log buffer: _segsize buffer into which
169  // inserts copy log records with log_core::insert
170 
174  vector<char*> _fetch_buffers;
175 
177 
178  uint32_t _fetch_buf_last;
179 
181 
183 
184  shared_ptr<fetch_buffer_loader_t> _fetch_buf_loader;
185 
187 
189 
191 
192  // Set of pointers into _buf (circular log buffer)
193  // and associated lsns. See detailed comments at log_core::insert
194  struct epoch {
195  lsn_t base_lsn; // lsn of _buf[0] for this epoch
196 
197  long base; // absolute position of _buf[0] (absolute meaning
198  // relative to the beginning of log.1)
199  long start; // offset from _buf[0] of this epoch
200  long end; // offset into log buffers _buf[0] of tail of
201  // log. Wraps modulo log buffer size, aka segsize.
203  : base_lsn(lsn_t::null),
204  base(0),
205  start(0),
206  end(0) {}
207 
208  epoch(lsn_t l, long b, long s, long e)
209  : base_lsn(l),
210  base(b),
211  start(s),
212  end(e) {
213  w_assert1(e >= s);
214  }
215 
216  epoch volatile* vthis() {
217  return this;
218  }
219  };
220 
226 
228 
230 
231  void _acquire_buffer_space(CArraySlot* info, long size);
232 
233  lsn_t _copy_to_buffer(logrec_t& rec, long pos, long size, CArraySlot* info);
234 
235  bool _update_epochs(CArraySlot* info);
236 
237  rc_t _join_carray(CArraySlot*& info, long& pos, int32_t size);
238 
239  rc_t _leave_carray(CArraySlot* info, int32_t size);
240 
241  void _copy_raw(CArraySlot* info, long& pos, const char* data, size_t size);
242 
246 
248 
249  enum {
251  };
252 
253  long _start; // byte number of oldest unwritten byte
254  long start_byte() const {
255  return _start;
256  }
257 
258  long _end; // byte number of insertion point
259  long end_byte() const {
260  return _end;
261  }
262 
263  long _segsize; // log buffer size
264 
266 
269  /*
270  * See src/internals.h, section LOG_M_INTERNAL
271  Divisions:
272 
273  Physical layout:
274 
275  The log consists of an unbounded number of "partitions" each
276  consisting of a fixed number of "segments." A partition is the
277  largest file that will be created and a segment is the size of the
278  in-memory buffer. Segments are further divided into "blocks" which
279  are the unit of I/O.
280 
281  Threads insert "entries" into the log (log records).
282 
283  One or more entries make up an "epoch" (data that will be flushed
284  using a single I/O). Epochs normally end at the end of a segment.
285  The log flush daemon constantly flushes any unflushed portion of
286  "valid" epochs. (An epoch is valid if its end > start.)
287  When an epoch reaches the end of a segment, the final log entry
288  will usually spill over into the next segment and the next
289  entry will begin a new epoch at a non-zero
290  offset of the new segment. However, a log entry which would spill
291  over into a new partition will begin a new epoch and join it.
292  Log records do not span partitions.
293  */
294 
295  /* FRJ: Partitions are not protected by either the insert or flush
296  mutex, but are instead managed separately using a combination
297  of mutex and reference counts. We do this because read
298  operations (e.g. fetch) need not impact either inserts or
299  flushes because (by definition) we read only already-written
300  data, which insert/flush never touches.
301 
302  Any time we change which file a partition_t points at (via open
303  or close), we must acquire the partition mutex. Each call to
304  open() increments a reference count which will be decremented
305  by a matching call to close(). Once a partition is open threads
306  may safely use it without the mutex because it will not be
307  closed until the ref count goes to zero. In particular, log
308  inserts do *not* acquire the partition mutex unless they need
309  to change the curr_partition.
310 
311  A thread should always acquire the partition mutex last. This
312  should happen naturally, since log_m acquires insert/flush
313  mutexen and srv_log acquires the partition mutex.
314  */
315  char _padding[CACHELINE_SIZE];
318  char _padding2[CACHELINE_TATAS_PADDING];
321  char _padding3[CACHELINE_TATAS_PADDING];
325  char _padding4[CACHELINE_MCS_PADDING];
328  // paired with _wait_cond, _flush_cond
329  pthread_mutex_t _wait_flush_lock;
330 
331  pthread_cond_t _wait_cond; // paired with _wait_flush_lock
332  pthread_cond_t _flush_cond; // paird with _wait_flush_lock
333 
334  bool _waiting_for_flush; // protected by log_m::_wait_flush_lock
335 
337 
340 
342 
348 
355 
358 
366 
372  bool _should_group_commit(unsigned long write_size);
373 
383 
384  bool directIO;
385 }; // log_core
386 
387 
394 class log_i {
395 public:
397  log_i(log_core& l, const lsn_t& lsn, const bool forward = true);
398 
399  ~log_i();
400 
402  bool xct_next(lsn_t& lsn, logrec_t*& r);
403 
404  bool xct_next(lsn_t& lsn, logrec_t& r);
405 
407  w_rc_t& get_last_rc();
408 
409 private:
411 
413 
415 
417 }; // log_i
418 
419 inline log_i::log_i(log_core& l, const lsn_t& lsn, const bool forward) : // Default: true for forward scan
420  log(l),
421  cursor(lsn),
422  forward_scan(forward) {}
423 
424 inline
426  last_rc.verify();
427 }
428 
429 inline w_rc_t&
431  return last_rc;
432 }
433 /*<std-footer incl-file-exclusion='LOG_H'> -- do not edit anything below this line -- */
434 
435 #endif // __LOG_CORE_H /*</std-footer>*/
void flush_daemon()
Log-flush daemon driver.
Definition: log_core.cpp:1031
uint32_t _fetch_buf_last
Definition: log_core.h:178
long start_byte() const
Definition: log_core.h:254
rc_t flush_all(bool block=true)
Definition: log_core.h:101
Definition: log_storage.h:160
w_rc_t & get_last_rc()
Get the return code from the last next() call.
Definition: log_core.h:430
bool _should_group_commit(unsigned long write_size)
Definition: log_core.cpp:1088
long segsize() const
Definition: log_core.h:125
Definition: log_core.cpp:192
rc_t insert(logrec_t &r, lsn_t *l=nullptr)
Definition: log_core.cpp:896
One slot in ConsolidationArray.
Definition: log_carray.h:131
#define w_assert1(x)
Level 1 should not add significant extra time.
Definition: w_base.h:198
bool forward_scan
Definition: log_core.h:416
void shutdown()
Definition: log_core.cpp:371
lsn_t base_lsn
Definition: log_core.h:195
lintel::Atomic< bool > _flush_daemon_running
Definition: log_core.h:341
lsn_t get_oldest_active_lsn()
Definition: log_core.cpp:1389
PoorMansOldestLsnTracker * get_oldest_lsn_tracker()
Definition: log_core.h:152
lsn_t _durable_lsn
Definition: log_core.h:190
lsn_t curr_lsn() const
Definition: log_core.h:115
long base
Definition: log_core.h:197
lsn_t _fetch_buf_end
Definition: log_core.h:182
PoorMansOldestLsnTracker * _oldest_lsn_tracker
Definition: log_core.h:247
pthread_mutex_t _wait_flush_lock
Definition: log_core.h:329
rc_t _join_carray(CArraySlot *&info, long &pos, int32_t size)
Definition: log_core.cpp:829
log_i(log_core &l, const lsn_t &lsn, const bool forward=true)
start a scan of the given log a the given log sequence number.
Definition: log_core.h:419
const size_t CACHELINE_TATAS_PADDING
Definition: tatas.h:140
Definition: log_core.h:194
void verify() const
Definition: w_rc.h:554
long _start
Definition: log_core.h:253
bool directIO
Definition: log_core.h:384
Header file for lintel::Atomic class.
rc_t _leave_carray(CArraySlot *info, int32_t size)
Definition: log_core.cpp:876
void _copy_raw(CArraySlot *info, long &pos, const char *data, size_t size)
Definition: log_core.cpp:923
vector< char * > _fetch_buffers
Definition: log_core.h:174
lsn_t _flush_lsn
Definition: log_core.h:265
Start-up parameters for the storage engine. See OPTIONS.
Definition: sm_options.h:24
void _acquire_buffer_space(CArraySlot *info, long size)
Definition: log_core.cpp:537
Log-scan iterator.
Definition: log_core.h:394
Definition: log_core.h:87
log_core & log
Definition: log_core.h:410
Definition: log_core.cpp:178
rc_t flush(const lsn_t &lsn, bool block=true, bool signal=true, bool *ret_flushed=nullptr)
Definition: log_core.cpp:984
epoch(lsn_t l, long b, long s, long e)
Definition: log_core.h:208
stopwatch_t _group_commit_timer
Timer object to keep track of group commit timeout.
Definition: log_core.h:357
rc_t compensate(const lsn_t &orig_lsn, const lsn_t &undo_lsn)
Definition: log_core.cpp:1229
Definition: partition.h:69
Represents a transactional log record.
Definition: logrec.h:143
flush_daemon_thread_t * _flush_daemon
Definition: log_core.h:336
w_rc_t last_rc
Definition: log_core.h:414
bool fetch_direct(lsn_t lsn, logrec_t *&lr, lsn_t &prev_lsn)
Definition: log_core.cpp:350
unsigned get_page_img_compression()
Definition: log_core.h:162
Log Sequence Number. See Log Sequence Numbers (LSN).
Definition: lsn.h:243
const T max(const T x, const T y)
Definition: w_minmax.h:45
bool _waiting_for_flush
Definition: log_core.h:334
const size_t CACHELINE_MCS_PADDING
Definition: mcs_lock.h:200
void discard_fetch_buffers(partition_number_t recycled=std::numeric_limits< partition_number_t >::max())
Definition: log_core.cpp:1363
long _end
Definition: log_core.h:258
rc_t init()
Definition: log_core.cpp:500
epoch()
Definition: log_core.h:202
lsn_t _fetch_buf_begin
Definition: log_core.h:180
a timer object.
Definition: stopwatch.h:34
Definition: log_storage.h:84
lsn_t _curr_lsn
Definition: log_core.h:188
Return code for most functions and methods.
Definition: w_rc.h:87
ConsolidationArray * _carray
Definition: log_core.h:347
string make_log_name(partition_number_t pnum) const
Definition: log_storage.cpp:485
epoch _cur_epoch
Definition: log_core.h:227
long end_byte() const
Definition: log_core.h:259
Definition: log_core.h:250
epoch _old_epoch
Definition: log_core.h:229
pthread_cond_t _flush_cond
Definition: log_core.h:332
An MCS queuing spinlock.
Definition: mcs_lock.h:61
lsn_t flush_daemon_work(lsn_t old_mark)
Flush unflushed-portion of log buffer.
Definition: log_core.cpp:1116
log_storage * get_storage()
Definition: log_core.h:148
unsigned _page_img_compression
Definition: log_core.h:382
bool _update_epochs(CArraySlot *info)
Definition: log_core.cpp:772
epoch _buf_epoch
Definition: log_core.h:225
Definition: log_core.h:140
ticker_thread_t * _ticker
Definition: log_core.h:186
A test-and-test-and-set spinlock.
Definition: tatas.h:25
rc_t fetch(lsn_t &lsn, void *buf, lsn_t *nxt, const bool forward)
Definition: log_core.cpp:228
The implementation class of Consolidation Array.
Definition: log_carray.h:203
long _segsize
Definition: log_core.h:263
long start
Definition: log_core.h:199
lsn_t _copy_to_buffer(logrec_t &rec, long pos, long size, CArraySlot *info)
Definition: log_core.cpp:760
long _group_commit_timeout
Definition: log_core.h:365
uint32_t _fetch_buf_first
Definition: log_core.h:176
log_storage * _storage
Definition: log_core.h:245
void start_flush_daemon()
Definition: log_core.cpp:206
mcs_lock _insert_lock
Definition: log_core.h:324
smlevel_0::partition_number_t partition_number_t
Definition: log_storage.h:72
Definition: log_core.cpp:92
lsn_t cursor
Definition: log_core.h:412
lsn_t durable_lsn() const
Definition: log_core.h:119
shared_ptr< fetch_buffer_loader_t > _fetch_buf_loader
Definition: log_core.h:184
rc_t load_fetch_buffers()
Definition: log_core.cpp:1299
tatas_lock _comp_lock
Definition: log_core.h:320
static const std::string IMPL_NAME
Definition: log_core.h:95
long end
Definition: log_core.h:200
virtual ~log_core()
Definition: log_core.cpp:515
char * _buf
Definition: log_core.h:168
lintel::Atomic< bool > _shutting_down
Definition: log_core.h:339
tatas_lock _flush_lock
Definition: log_core.h:317
~log_i()
Definition: log_core.h:425
epoch volatile * vthis()
Definition: log_core.h:216
This class is a strawman implementation of tracking the oldest active transaction in the system...
Definition: log_lsn_tracker.h:24
size_t _group_commit_size
Definition: log_core.h:354
log_core(const sm_options &)
Definition: log_core.cpp:404
rc_t truncate()
Definition: log_core.cpp:734
pthread_cond_t _wait_cond
Definition: log_core.h:331
string make_log_name(uint32_t p)
Definition: log_core.h:144
static lsn_t first_lsn(uint32_t pnum)
Definition: log_core.h:158
const size_t CACHELINE_SIZE
CPU Cache line size in bytes.
Definition: w_defines.h:183