buf0flu.cc   buf0flu.cc 
skipping to change at line 133 skipping to change at line 133
check in debug builds. */ check in debug builds. */
if (--buf_flush_validate_count > 0) { if (--buf_flush_validate_count > 0) {
return(TRUE); return(TRUE);
} }
buf_flush_validate_count = BUF_FLUSH_VALIDATE_SKIP; buf_flush_validate_count = BUF_FLUSH_VALIDATE_SKIP;
return(buf_flush_validate_low(buf_pool)); return(buf_flush_validate_low(buf_pool));
} }
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
/*******************************************************************//**
Sets hazard pointer during flush_list iteration. */
UNIV_INLINE
void
buf_flush_set_hp(
/*=============*/
buf_pool_t* buf_pool,/*!< in/out: buffer pool instance *
/
const buf_page_t* bpage) /*!< in: buffer control block */
{
ut_ad(buf_flush_list_mutex_own(buf_pool));
ut_ad(buf_pool->flush_list_hp == NULL || bpage == NULL);
ut_ad(!bpage || buf_page_in_file(bpage));
ut_ad(!bpage || bpage->in_flush_list);
ut_ad(!bpage || buf_pool_from_bpage(bpage) == buf_pool);
buf_pool->flush_list_hp = bpage;
}
/*******************************************************************//**
Checks if the given block is a hazard pointer
@return true if bpage is hazard pointer */
UNIV_INLINE
bool
buf_flush_is_hp(
/*============*/
buf_pool_t* buf_pool,/*!< in: buffer pool instance */
const buf_page_t* bpage) /*!< in: buffer control block */
{
ut_ad(buf_flush_list_mutex_own(buf_pool));
return(buf_pool->flush_list_hp == bpage);
}
/*******************************************************************//**
Whenever we move a block in flush_list (either to remove it or to
relocate it) we check the hazard pointer set by some other thread
doing the flush list scan. If the hazard pointer is the same as the
one we are about going to move then we set it to NULL to force a rescan
in the thread doing the batch. */
UNIV_INLINE
void
buf_flush_update_hp(
/*================*/
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
buf_page_t* bpage) /*!< in: buffer control block */
{
ut_ad(buf_flush_list_mutex_own(buf_pool));
if (buf_flush_is_hp(buf_pool, bpage)) {
buf_flush_set_hp(buf_pool, NULL);
MONITOR_INC(MONITOR_FLUSH_HP_RESCAN);
}
}
/******************************************************************//** /******************************************************************//**
Insert a block in the flush_rbt and returns a pointer to its Insert a block in the flush_rbt and returns a pointer to its
predecessor or NULL if no predecessor. The ordering is maintained predecessor or NULL if no predecessor. The ordering is maintained
on the basis of the <oldest_modification, space, offset> key. on the basis of the <oldest_modification, space, offset> key.
@return pointer to the predecessor or NULL if no predecessor. */ @return pointer to the predecessor or NULL if no predecessor. */
static static
buf_page_t* buf_page_t*
buf_flush_insert_in_flush_rbt( buf_flush_insert_in_flush_rbt(
/*==========================*/ /*==========================*/
buf_page_t* bpage) /*!< in: bpage to be inserted. */ buf_page_t* bpage) /*!< in: bpage to be inserted. */
skipping to change at line 474 skipping to change at line 528
" InnoDB: Error: buffer block state %lu" " InnoDB: Error: buffer block state %lu"
" in the LRU list!\n", " in the LRU list!\n",
(ulong) buf_page_get_state(bpage)); (ulong) buf_page_get_state(bpage));
ut_print_buf(stderr, bpage, sizeof(buf_page_t)); ut_print_buf(stderr, bpage, sizeof(buf_page_t));
putc('\n', stderr); putc('\n', stderr);
return(FALSE); return(FALSE);
} }
/********************************************************************//** /********************************************************************//**
Returns TRUE if the block is modified and ready for flushing. Returns true if the block is modified and ready for flushing.
@return TRUE if can flush immediately */ @return true if can flush immediately */
UNIV_INLINE UNIV_INTERN
ibool bool
buf_flush_ready_for_flush( buf_flush_ready_for_flush(
/*======================*/ /*======================*/
buf_page_t* bpage, /*!< in: buffer control block, must be buf_page_t* bpage, /*!< in: buffer control block, must be
buf_page_in_file(bpage) */ buf_page_in_file(bpage) */
enum buf_flush flush_type)/*!< in: type of flush */ buf_flush_t flush_type)/*!< in: type of flush */
{ {
#ifdef UNIV_DEBUG #ifdef UNIV_DEBUG
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
ut_ad(buf_pool_mutex_own(buf_pool)); ut_ad(buf_pool_mutex_own(buf_pool));
#endif #endif /* UNIV_DEBUG */
ut_a(buf_page_in_file(bpage)); ut_a(buf_page_in_file(bpage));
ut_ad(mutex_own(buf_page_get_mutex(bpage))); ut_ad(mutex_own(buf_page_get_mutex(bpage)));
ut_ad(flush_type < BUF_FLUSH_N_TYPES); ut_ad(flush_type < BUF_FLUSH_N_TYPES);
if (bpage->oldest_modification == 0 if (bpage->oldest_modification == 0
|| buf_page_get_io_fix(bpage) != BUF_IO_NONE) { || buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
return(FALSE); return(false);
} }
ut_ad(bpage->in_flush_list); ut_ad(bpage->in_flush_list);
switch (flush_type) { switch (flush_type) {
case BUF_FLUSH_LIST: case BUF_FLUSH_LIST:
return(TRUE); return(true);
case BUF_FLUSH_LRU: case BUF_FLUSH_LRU:
case BUF_FLUSH_SINGLE_PAGE: case BUF_FLUSH_SINGLE_PAGE:
/* Because any thread may call single page flush, even /* Because any thread may call single page flush, even
when owning locks on pages, to avoid deadlocks, we must when owning locks on pages, to avoid deadlocks, we must
make sure that the that it is not buffer fixed. make sure that the that it is not buffer fixed.
The same holds true for LRU flush because a user thread The same holds true for LRU flush because a user thread
may end up waiting for an LRU flush to end while may end up waiting for an LRU flush to end while
holding locks on other pages. */ holding locks on other pages. */
return(bpage->buf_fix_count == 0); return(bpage->buf_fix_count == 0);
case BUF_FLUSH_N_TYPES: case BUF_FLUSH_N_TYPES:
break; break;
} }
ut_error; ut_error;
return(FALSE); return(false);
} }
/********************************************************************//** /********************************************************************//**
Remove a block from the flush list of modified blocks. */ Remove a block from the flush list of modified blocks. */
UNIV_INTERN UNIV_INTERN
void void
buf_flush_remove( buf_flush_remove(
/*=============*/ /*=============*/
buf_page_t* bpage) /*!< in: pointer to the block in question */ buf_page_t* bpage) /*!< in: pointer to the block in question */
{ {
skipping to change at line 577 skipping to change at line 632
zip_size = page_zip_get_size(&bpage->zip); zip_size = page_zip_get_size(&bpage->zip);
buf_pool->stat.flush_list_bytes -= zip_size ? zip_size : UNIV_PAGE_S IZE; buf_pool->stat.flush_list_bytes -= zip_size ? zip_size : UNIV_PAGE_S IZE;
bpage->oldest_modification = 0; bpage->oldest_modification = 0;
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
ut_a(buf_flush_validate_skip(buf_pool)); ut_a(buf_flush_validate_skip(buf_pool));
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
buf_flush_update_hp(buf_pool, bpage);
buf_flush_list_mutex_exit(buf_pool); buf_flush_list_mutex_exit(buf_pool);
} }
/*******************************************************************//** /*******************************************************************//**
Relocates a buffer control block on the flush_list. Relocates a buffer control block on the flush_list.
Note that it is assumed that the contents of bpage have already been Note that it is assumed that the contents of bpage have already been
copied to dpage. copied to dpage.
IMPORTANT: When this function is called bpage and dpage are not IMPORTANT: When this function is called bpage and dpage are not
exact copies of each other. For example, they both will have different exact copies of each other. For example, they both will have different
::state. Also the ::list pointers in dpage may be stale. We need to ::state. Also the ::list pointers in dpage may be stale. We need to
skipping to change at line 655 skipping to change at line 711
} }
/* Just an extra check. Previous in flush_list /* Just an extra check. Previous in flush_list
should be the same control block as in flush_rbt. */ should be the same control block as in flush_rbt. */
ut_a(!buf_pool->flush_rbt || prev_b == prev); ut_a(!buf_pool->flush_rbt || prev_b == prev);
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
ut_a(buf_flush_validate_low(buf_pool)); ut_a(buf_flush_validate_low(buf_pool));
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
buf_flush_update_hp(buf_pool, bpage);
buf_flush_list_mutex_exit(buf_pool); buf_flush_list_mutex_exit(buf_pool);
} }
/********************************************************************//** /********************************************************************//**
Updates the flush system data structures when a write is completed. */ Updates the flush system data structures when a write is completed. */
UNIV_INTERN UNIV_INTERN
void void
buf_flush_write_complete( buf_flush_write_complete(
/*=====================*/ /*=====================*/
buf_page_t* bpage) /*!< in: pointer to the block in question */ buf_page_t* bpage) /*!< in: pointer to the block in question */
{ {
enum buf_flush flush_type; buf_flush_t flush_type;
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
ut_ad(bpage); ut_ad(bpage);
buf_flush_remove(bpage); buf_flush_remove(bpage);
flush_type = buf_page_get_flush_type(bpage); flush_type = buf_page_get_flush_type(bpage);
buf_pool->n_flush[flush_type]--; buf_pool->n_flush[flush_type]--;
/* fprintf(stderr, "n pending flush %lu\n", /* fprintf(stderr, "n pending flush %lu\n",
buf_pool->n_flush[flush_type]); */ buf_pool->n_flush[flush_type]); */
if (buf_pool->n_flush[flush_type] == 0 if (buf_pool->n_flush[flush_type] == 0
&& buf_pool->init_flush[flush_type] == FALSE) { && buf_pool->init_flush[flush_type] == FALSE) {
/* The running flush batch has ended */ /* The running flush batch has ended */
os_event_set(buf_pool->no_flush[flush_type]); os_event_set(buf_pool->no_flush[flush_type]);
} }
switch (flush_type) { buf_dblwr_update(bpage, flush_type);
case BUF_FLUSH_LIST:
case BUF_FLUSH_LRU:
buf_dblwr_update();
break;
case BUF_FLUSH_SINGLE_PAGE:
/* Single page flushes are synchronous. No need
to update doublewrite */
break;
case BUF_FLUSH_N_TYPES:
ut_error;
}
} }
#endif /* !UNIV_HOTBACKUP */ #endif /* !UNIV_HOTBACKUP */
/********************************************************************//** /********************************************************************//**
Calculate the checksum of a page from compressed table and update the page. */ Calculate the checksum of a page from compressed table and update the page. */
UNIV_INTERN UNIV_INTERN
void void
buf_flush_update_zip_checksum( buf_flush_update_zip_checksum(
/*==========================*/ /*==========================*/
buf_frame_t* page, /*!< in/out: Page to update */ buf_frame_t* page, /*!< in/out: Page to update */
skipping to change at line 830 skipping to change at line 876
this field because the file will not be readable by old this field because the file will not be readable by old
versions of MySQL/InnoDB anyway (older than MySQL 5.6.3) */ versions of MySQL/InnoDB anyway (older than MySQL 5.6.3) */
} }
mach_write_to_4(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM, mach_write_to_4(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM,
checksum); checksum);
} }
#ifndef UNIV_HOTBACKUP #ifndef UNIV_HOTBACKUP
/********************************************************************//** /********************************************************************//**
Flush a batch of writes to the datafiles that have already been
written by the OS. */
UNIV_INTERN
void
buf_flush_sync_datafiles(void)
/*==========================*/
{
/* Wake possible simulated aio thread to actually post the
writes to the operating system */
os_aio_simulated_wake_handler_threads();
/* Wait that all async writes to tablespaces have been posted to
the OS */
os_aio_wait_until_no_pending_writes();
/* Now we flush the data to disk (for example, with fsync) */
fil_flush_file_spaces(FIL_TABLESPACE);
return;
}
/********************************************************************//**
Does an asynchronous write of a buffer page. NOTE: in simulated aio and Does an asynchronous write of a buffer page. NOTE: in simulated aio and
also when the doublewrite buffer is used, we must call also when the doublewrite buffer is used, we must call
buf_dblwr_flush_buffered_writes after we have posted a batch of buf_dblwr_flush_buffered_writes after we have posted a batch of
writes! */ writes! */
static static
void void
buf_flush_write_block_low( buf_flush_write_block_low(
/*======================*/ /*======================*/
buf_page_t* bpage, /*!< in: buffer block to write */ buf_page_t* bpage, /*!< in: buffer block to write */
enum buf_flush flush_type) /*!< in: type of flush */ buf_flush_t flush_type, /*!< in: type of flush */
bool sync) /*!< in: true if sync IO request */
{ {
ulint zip_size = buf_page_get_zip_size(bpage); ulint zip_size = buf_page_get_zip_size(bpage);
page_t* frame = NULL; page_t* frame = NULL;
#ifdef UNIV_DEBUG #ifdef UNIV_DEBUG
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
ut_ad(!buf_pool_mutex_own(buf_pool)); ut_ad(!buf_pool_mutex_own(buf_pool));
#endif #endif
#ifdef UNIV_LOG_DEBUG #ifdef UNIV_LOG_DEBUG
skipping to change at line 938 skipping to change at line 963
buf_flush_init_for_writing(((buf_block_t*) bpage)->frame, buf_flush_init_for_writing(((buf_block_t*) bpage)->frame,
bpage->zip.data bpage->zip.data
? &bpage->zip : NULL, ? &bpage->zip : NULL,
bpage->newest_modification); bpage->newest_modification);
break; break;
} }
if (!srv_use_doublewrite_buf || !buf_dblwr) { if (!srv_use_doublewrite_buf || !buf_dblwr) {
fil_io(OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER, fil_io(OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER,
FALSE, buf_page_get_space(bpage), zip_size, sync, buf_page_get_space(bpage), zip_size,
buf_page_get_page_no(bpage), 0, buf_page_get_page_no(bpage), 0,
zip_size ? zip_size : UNIV_PAGE_SIZE, zip_size ? zip_size : UNIV_PAGE_SIZE,
frame, bpage); frame, bpage);
} else if (flush_type == BUF_FLUSH_SINGLE_PAGE) { } else if (flush_type == BUF_FLUSH_SINGLE_PAGE) {
buf_dblwr_write_single_page(bpage); buf_dblwr_write_single_page(bpage, sync);
} else { } else {
ut_ad(!sync);
buf_dblwr_add_to_batch(bpage); buf_dblwr_add_to_batch(bpage);
} }
/* When doing single page flushing the IO is done synchronously
and we flush the changes to disk only for the tablespace we
are working on. */
if (sync) {
ut_ad(flush_type == BUF_FLUSH_SINGLE_PAGE);
fil_flush(buf_page_get_space(bpage));
buf_page_io_complete(bpage);
}
/* Increment the counter of I/O operations used
for selecting LRU policy. */
buf_LRU_stat_inc_io();
} }
/********************************************************************//** /********************************************************************//**
Writes a flushable page asynchronously from the buffer pool to a file. Writes a flushable page asynchronously from the buffer pool to a file.
NOTE: in simulated aio we must call NOTE: in simulated aio we must call
os_aio_simulated_wake_handler_threads after we have posted a batch of os_aio_simulated_wake_handler_threads after we have posted a batch of
writes! NOTE: buf_pool->mutex and buf_page_get_mutex(bpage) must be writes! NOTE: buf_pool->mutex and buf_page_get_mutex(bpage) must be
held upon entering this function, and they will be released by this held upon entering this function, and they will be released by this
function. */ function. */
UNIV_INTERN UNIV_INTERN
void void
buf_flush_page( buf_flush_page(
/*===========*/ /*===========*/
buf_pool_t* buf_pool, /*!< in: buffer pool instance */ buf_pool_t* buf_pool, /*!< in: buffer pool instance */
buf_page_t* bpage, /*!< in: buffer control block */ buf_page_t* bpage, /*!< in: buffer control block */
buf_flush flush_type) /*!< in: type of flush */ buf_flush_t flush_type, /*!< in: type of flush */
bool sync) /*!< in: true if sync IO request */
{ {
ib_mutex_t* block_mutex; ib_mutex_t* block_mutex;
ibool is_uncompressed; ibool is_uncompressed;
ut_ad(flush_type < BUF_FLUSH_N_TYPES); ut_ad(flush_type < BUF_FLUSH_N_TYPES);
ut_ad(buf_pool_mutex_own(buf_pool)); ut_ad(buf_pool_mutex_own(buf_pool));
ut_ad(buf_page_in_file(bpage)); ut_ad(buf_page_in_file(bpage));
ut_ad(!sync || flush_type == BUF_FLUSH_SINGLE_PAGE);
block_mutex = buf_page_get_mutex(bpage); block_mutex = buf_page_get_mutex(bpage);
ut_ad(mutex_own(block_mutex)); ut_ad(mutex_own(block_mutex));
ut_ad(buf_flush_ready_for_flush(bpage, flush_type)); ut_ad(buf_flush_ready_for_flush(bpage, flush_type));
buf_page_set_io_fix(bpage, BUF_IO_WRITE); buf_page_set_io_fix(bpage, BUF_IO_WRITE);
buf_page_set_flush_type(bpage, flush_type); buf_page_set_flush_type(bpage, flush_type);
skipping to change at line 1065 skipping to change at line 1106
oldest_modification != 0. Thus, it cannot be relocated in the oldest_modification != 0. Thus, it cannot be relocated in the
buffer pool or removed from flush_list or LRU_list. */ buffer pool or removed from flush_list or LRU_list. */
#ifdef UNIV_DEBUG #ifdef UNIV_DEBUG
if (buf_debug_prints) { if (buf_debug_prints) {
fprintf(stderr, fprintf(stderr,
"Flushing %u space %u page %u\n", "Flushing %u space %u page %u\n",
flush_type, bpage->space, bpage->offset); flush_type, bpage->space, bpage->offset);
} }
#endif /* UNIV_DEBUG */ #endif /* UNIV_DEBUG */
buf_flush_write_block_low(bpage, flush_type); buf_flush_write_block_low(bpage, flush_type, sync);
} }
# if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG # if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
/********************************************************************//** /********************************************************************//**
Writes a flushable page asynchronously from the buffer pool to a file. Writes a flushable page asynchronously from the buffer pool to a file.
NOTE: buf_pool->mutex and block->mutex must be held upon entering this NOTE: buf_pool->mutex and block->mutex must be held upon entering this
function, and they will be released by this function after flushing. function, and they will be released by this function after flushing.
This is loosely based on buf_flush_batch() and buf_flush_page(). This is loosely based on buf_flush_batch() and buf_flush_page().
@return TRUE if the page was flushed and the mutexes released */ @return TRUE if the page was flushed and the mutexes released */
UNIV_INTERN UNIV_INTERN
skipping to change at line 1092 skipping to change at line 1133
ut_ad(buf_pool_mutex_own(buf_pool)); ut_ad(buf_pool_mutex_own(buf_pool));
ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE); ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
ut_ad(mutex_own(&block->mutex)); ut_ad(mutex_own(&block->mutex));
if (!buf_flush_ready_for_flush(&block->page, BUF_FLUSH_SINGLE_PAGE)) { if (!buf_flush_ready_for_flush(&block->page, BUF_FLUSH_SINGLE_PAGE)) {
return(FALSE); return(FALSE);
} }
/* The following call will release the buffer pool and /* The following call will release the buffer pool and
block mutex. */ block mutex. */
buf_flush_page(buf_pool, &block->page, BUF_FLUSH_SINGLE_PAGE); buf_flush_page(buf_pool, &block->page, BUF_FLUSH_SINGLE_PAGE, true);
buf_flush_sync_datafiles();
return(TRUE); return(TRUE);
} }
# endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */ # endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
/***********************************************************//** /***********************************************************//**
Check the page is in buffer pool and can be flushed. Check the page is in buffer pool and can be flushed.
@return true if the page can be flushed. */ @return true if the page can be flushed. */
static static
bool bool
buf_flush_check_neighbor( buf_flush_check_neighbor(
/*=====================*/ /*=====================*/
ulint space, /*!< in: space id */ ulint space, /*!< in: space id */
ulint offset, /*!< in: page offset */ ulint offset, /*!< in: page offset */
enum buf_flush flush_type) /*!< in: BUF_FLUSH_LRU or buf_flush_t flush_type) /*!< in: BUF_FLUSH_LRU or
BUF_FLUSH_LIST */ BUF_FLUSH_LIST */
{ {
buf_page_t* bpage; buf_page_t* bpage;
buf_pool_t* buf_pool = buf_pool_get(space, offset); buf_pool_t* buf_pool = buf_pool_get(space, offset);
bool ret; bool ret;
ut_ad(flush_type == BUF_FLUSH_LRU ut_ad(flush_type == BUF_FLUSH_LRU
|| flush_type == BUF_FLUSH_LIST); || flush_type == BUF_FLUSH_LIST);
buf_pool_mutex_enter(buf_pool); buf_pool_mutex_enter(buf_pool);
skipping to change at line 1156 skipping to change at line 1196
/***********************************************************//** /***********************************************************//**
Flushes to disk all flushable pages within the flush area. Flushes to disk all flushable pages within the flush area.
@return number of pages flushed */ @return number of pages flushed */
static static
ulint ulint
buf_flush_try_neighbors( buf_flush_try_neighbors(
/*====================*/ /*====================*/
ulint space, /*!< in: space id */ ulint space, /*!< in: space id */
ulint offset, /*!< in: page offset */ ulint offset, /*!< in: page offset */
enum buf_flush flush_type, /*!< in: BUF_FLUSH_LRU or buf_flush_t flush_type, /*!< in: BUF_FLUSH_LRU or
BUF_FLUSH_LIST */ BUF_FLUSH_LIST */
ulint n_flushed, /*!< in: number of pages ulint n_flushed, /*!< in: number of pages
flushed so far in this batch */ flushed so far in this batch */
ulint n_to_flush) /*!< in: maximum number of pages ulint n_to_flush) /*!< in: maximum number of pages
we are allowed to flush */ we are allowed to flush */
{ {
ulint i; ulint i;
ulint low; ulint low;
ulint high; ulint high;
ulint count = 0; ulint count = 0;
skipping to change at line 1277 skipping to change at line 1317
&& (i == offset || !bpage->buf_fix_count)) { && (i == offset || !bpage->buf_fix_count)) {
/* We only try to flush those /* We only try to flush those
neighbors != offset where the buf fix neighbors != offset where the buf fix
count is zero, as we then know that we count is zero, as we then know that we
probably can latch the page without a probably can latch the page without a
semaphore wait. Semaphore waits are semaphore wait. Semaphore waits are
expensive because we must flush the expensive because we must flush the
doublewrite buffer before we start doublewrite buffer before we start
waiting. */ waiting. */
buf_flush_page(buf_pool, bpage, flush_type); buf_flush_page(buf_pool, bpage, flush_type, false);
ut_ad(!mutex_own(block_mutex)); ut_ad(!mutex_own(block_mutex));
ut_ad(!buf_pool_mutex_own(buf_pool)); ut_ad(!buf_pool_mutex_own(buf_pool));
count++; count++;
continue; continue;
} else { } else {
mutex_exit(block_mutex); mutex_exit(block_mutex);
} }
} }
buf_pool_mutex_exit(buf_pool); buf_pool_mutex_exit(buf_pool);
} }
skipping to change at line 1314 skipping to change at line 1354
@return TRUE if buf_pool mutex was released during this function. @return TRUE if buf_pool mutex was released during this function.
This does not guarantee that some pages were written as well. This does not guarantee that some pages were written as well.
Number of pages written are incremented to the count. */ Number of pages written are incremented to the count. */
static static
ibool ibool
buf_flush_page_and_try_neighbors( buf_flush_page_and_try_neighbors(
/*=============================*/ /*=============================*/
buf_page_t* bpage, /*!< in: buffer control block, buf_page_t* bpage, /*!< in: buffer control block,
must be must be
buf_page_in_file(bpage) */ buf_page_in_file(bpage) */
enum buf_flush flush_type, /*!< in: BUF_FLUSH_LRU buf_flush_t flush_type, /*!< in: BUF_FLUSH_LRU
or BUF_FLUSH_LIST */ or BUF_FLUSH_LIST */
ulint n_to_flush, /*!< in: number of pages to ulint n_to_flush, /*!< in: number of pages to
flush */ flush */
ulint* count) /*!< in/out: number of pages ulint* count) /*!< in/out: number of pages
flushed */ flushed */
{ {
ib_mutex_t* block_mutex; ib_mutex_t* block_mutex;
ibool flushed = FALSE; ibool flushed = FALSE;
#ifdef UNIV_DEBUG #ifdef UNIV_DEBUG
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
skipping to change at line 1564 skipping to change at line 1604
ulint min_n, /*!< in: wished minimum mumber ulint min_n, /*!< in: wished minimum mumber
of blocks flushed (it is not of blocks flushed (it is not
guaranteed that the actual guaranteed that the actual
number is that big, though) */ number is that big, though) */
lsn_t lsn_limit) /*!< all blocks whose lsn_t lsn_limit) /*!< all blocks whose
oldest_modification is smaller oldest_modification is smaller
than this should be flushed (if than this should be flushed (if
their number does not exceed their number does not exceed
min_n) */ min_n) */
{ {
ulint len;
buf_page_t* bpage;
ulint count = 0; ulint count = 0;
ulint scanned = 0; ulint scanned = 0;
ut_ad(buf_pool_mutex_own(buf_pool)); ut_ad(buf_pool_mutex_own(buf_pool));
/* If we have flushed enough, leave the loop */ /* Start from the end of the list looking for a suitable
do { block to be flushed. */
/* Start from the end of the list looking for a suitable buf_flush_list_mutex_enter(buf_pool);
block to be flushed. */ ulint len = UT_LIST_GET_LEN(buf_pool->flush_list);
buf_flush_list_mutex_enter(buf_pool);
/* We use len here because theoretically insertions can
happen in the flush_list below while we are traversing
it for a suitable candidate for flushing. We'd like to
set a limit on how farther we are willing to traverse
the list. */
len = UT_LIST_GET_LEN(buf_pool->flush_list);
bpage = UT_LIST_GET_LAST(buf_pool->flush_list);
if (bpage) {
ut_a(bpage->oldest_modification > 0);
}
if (!bpage || bpage->oldest_modification >= lsn_limit) { /* In order not to degenerate this scan to O(n*n) we attempt
to preserve pointer of previous block in the flush list. To do
so we declare it a hazard pointer. Any thread working on the
flush list must check the hazard pointer and if it is removing
the same block then it must reset it. */
for (buf_page_t* bpage = UT_LIST_GET_LAST(buf_pool->flush_list);
count < min_n && bpage != NULL && len > 0
&& bpage->oldest_modification < lsn_limit;
++scanned) {
/* We have flushed enough */ bool flushed;
buf_flush_list_mutex_exit(buf_pool); buf_page_t* prev;
break;
}
ut_a(bpage->oldest_modification > 0); ut_a(bpage->oldest_modification > 0);
ut_ad(bpage->in_flush_list); ut_ad(bpage->in_flush_list);
buf_flush_list_mutex_exit(buf_pool); prev = UT_LIST_GET_PREV(list, bpage);
buf_flush_set_hp(buf_pool, prev);
/* The list may change during the flushing and we cannot buf_flush_list_mutex_exit(buf_pool);
safely preserve within this function a pointer to a
block in the list! */
while (bpage != NULL
&& len > 0
&& !buf_flush_page_and_try_neighbors(
bpage, BUF_FLUSH_LIST, min_n, &count)) {
++scanned;
buf_flush_list_mutex_enter(buf_pool);
/* If we are here that means that buf_pool->mutex
was not released in buf_flush_page_and_try_neighbor
s()
above and this guarantees that bpage didn't get
relocated since we released the flush_list
mutex above. There is a chance, however, that
the bpage got removed from flush_list (not
currently possible because flush_list_remove()
also obtains buf_pool mutex but that may change
in future). To avoid this scenario we check
the oldest_modification and if it is zero
we start all over again. */
if (bpage->oldest_modification == 0) {
buf_flush_list_mutex_exit(buf_pool);
break;
}
bpage = UT_LIST_GET_PREV(list, bpage); flushed = buf_flush_page_and_try_neighbors(
bpage, BUF_FLUSH_LIST, min_n, &count);
ut_ad(!bpage || bpage->in_flush_list); buf_flush_list_mutex_enter(buf_pool);
buf_flush_list_mutex_exit(buf_pool); ut_ad(flushed || buf_flush_is_hp(buf_pool, prev));
if (!buf_flush_is_hp(buf_pool, prev)) {
/* The hazard pointer was reset by some other
thread. Restart the scan. */
ut_ad(buf_flush_is_hp(buf_pool, NULL));
bpage = UT_LIST_GET_LAST(buf_pool->flush_list);
len = UT_LIST_GET_LEN(buf_pool->flush_list);
} else {
bpage = prev;
--len; --len;
buf_flush_set_hp(buf_pool, NULL);
} }
} while (count < min_n && bpage != NULL && len > 0); ut_ad(!bpage || bpage->in_flush_list);
}
buf_flush_list_mutex_exit(buf_pool);
MONITOR_INC_VALUE_CUMULATIVE(MONITOR_FLUSH_BATCH_SCANNED, MONITOR_INC_VALUE_CUMULATIVE(MONITOR_FLUSH_BATCH_SCANNED,
MONITOR_FLUSH_BATCH_SCANNED_NUM_CALL, MONITOR_FLUSH_BATCH_SCANNED_NUM_CALL,
MONITOR_FLUSH_BATCH_SCANNED_PER_CALL, MONITOR_FLUSH_BATCH_SCANNED_PER_CALL,
scanned); scanned);
ut_ad(buf_pool_mutex_own(buf_pool)); ut_ad(buf_pool_mutex_own(buf_pool));
return(count); return(count);
} }
skipping to change at line 1663 skipping to change at line 1681
NOTE 1: in the case of an LRU flush the calling thread may own latches to NOTE 1: in the case of an LRU flush the calling thread may own latches to
pages: to avoid deadlocks, this function must be written so that it cannot pages: to avoid deadlocks, this function must be written so that it cannot
end up waiting for these latches! NOTE 2: in the case of a flush list flush , end up waiting for these latches! NOTE 2: in the case of a flush list flush ,
the calling thread is not allowed to own any latches on pages! the calling thread is not allowed to own any latches on pages!
@return number of blocks for which the write request was queued */ @return number of blocks for which the write request was queued */
static static
ulint ulint
buf_flush_batch( buf_flush_batch(
/*============*/ /*============*/
buf_pool_t* buf_pool, /*!< in: buffer pool instance */ buf_pool_t* buf_pool, /*!< in: buffer pool instance */
enum buf_flush flush_type, /*!< in: BUF_FLUSH_LRU or buf_flush_t flush_type, /*!< in: BUF_FLUSH_LRU or
BUF_FLUSH_LIST; if BUF_FLUSH_LIST, BUF_FLUSH_LIST; if BUF_FLUSH_LIST,
then the caller must not own any then the caller must not own any
latches on pages */ latches on pages */
ulint min_n, /*!< in: wished minimum mumber of bl ocks ulint min_n, /*!< in: wished minimum mumber of bl ocks
flushed (it is not guaranteed that t he flushed (it is not guaranteed that t he
actual number is that big, though) * / actual number is that big, though) * /
lsn_t lsn_limit) /*!< in: in the case of BUF_FLUSH_LI ST lsn_t lsn_limit) /*!< in: in the case of BUF_FLUSH_LI ST
all blocks whose oldest_modification is all blocks whose oldest_modification is
smaller than this should be flushed smaller than this should be flushed
(if their number does not exceed (if their number does not exceed
skipping to change at line 1719 skipping to change at line 1737
return(count); return(count);
} }
/******************************************************************//** /******************************************************************//**
Gather the aggregated stats for both flush list and LRU list flushing */ Gather the aggregated stats for both flush list and LRU list flushing */
static static
void void
buf_flush_common( buf_flush_common(
/*=============*/ /*=============*/
enum buf_flush flush_type, /*!< in: type of flush */ buf_flush_t flush_type, /*!< in: type of flush */
ulint page_count) /*!< in: number of pages flushed */ ulint page_count) /*!< in: number of pages flushed */
{ {
buf_dblwr_flush_buffered_writes(); buf_dblwr_flush_buffered_writes();
ut_a(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST); ut_a(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
#ifdef UNIV_DEBUG #ifdef UNIV_DEBUG
if (buf_debug_prints && page_count > 0) { if (buf_debug_prints && page_count > 0) {
fprintf(stderr, flush_type == BUF_FLUSH_LRU fprintf(stderr, flush_type == BUF_FLUSH_LRU
? "Flushed %lu pages in LRU flush\n" ? "Flushed %lu pages in LRU flush\n"
skipping to change at line 1745 skipping to change at line 1763
srv_stats.buf_pool_flushed.add(page_count); srv_stats.buf_pool_flushed.add(page_count);
} }
/******************************************************************//** /******************************************************************//**
Start a buffer flush batch for LRU or flush list */ Start a buffer flush batch for LRU or flush list */
static static
ibool ibool
buf_flush_start( buf_flush_start(
/*============*/ /*============*/
buf_pool_t* buf_pool, /*!< buffer pool instance */ buf_pool_t* buf_pool, /*!< buffer pool instance */
enum buf_flush flush_type) /*!< in: BUF_FLUSH_LRU buf_flush_t flush_type) /*!< in: BUF_FLUSH_LRU
or BUF_FLUSH_LIST */ or BUF_FLUSH_LIST */
{ {
buf_pool_mutex_enter(buf_pool); buf_pool_mutex_enter(buf_pool);
if (buf_pool->n_flush[flush_type] > 0 if (buf_pool->n_flush[flush_type] > 0
|| buf_pool->init_flush[flush_type] == TRUE) { || buf_pool->init_flush[flush_type] == TRUE) {
/* There is already a flush batch of the same type running * / /* There is already a flush batch of the same type running * /
buf_pool_mutex_exit(buf_pool); buf_pool_mutex_exit(buf_pool);
skipping to change at line 1774 skipping to change at line 1792
return(TRUE); return(TRUE);
} }
/******************************************************************//** /******************************************************************//**
End a buffer flush batch for LRU or flush list */ End a buffer flush batch for LRU or flush list */
static static
void void
buf_flush_end( buf_flush_end(
/*==========*/ /*==========*/
buf_pool_t* buf_pool, /*!< buffer pool instance */ buf_pool_t* buf_pool, /*!< buffer pool instance */
enum buf_flush flush_type) /*!< in: BUF_FLUSH_LRU buf_flush_t flush_type) /*!< in: BUF_FLUSH_LRU
or BUF_FLUSH_LIST */ or BUF_FLUSH_LIST */
{ {
buf_pool_mutex_enter(buf_pool); buf_pool_mutex_enter(buf_pool);
buf_pool->init_flush[flush_type] = FALSE; buf_pool->init_flush[flush_type] = FALSE;
buf_pool->try_LRU_scan = TRUE; buf_pool->try_LRU_scan = TRUE;
if (buf_pool->n_flush[flush_type] == 0) { if (buf_pool->n_flush[flush_type] == 0) {
skipping to change at line 1800 skipping to change at line 1818
buf_pool_mutex_exit(buf_pool); buf_pool_mutex_exit(buf_pool);
} }
/******************************************************************//** /******************************************************************//**
Waits until a flush batch of the given type ends */ Waits until a flush batch of the given type ends */
UNIV_INTERN UNIV_INTERN
void void
buf_flush_wait_batch_end( buf_flush_wait_batch_end(
/*=====================*/ /*=====================*/
buf_pool_t* buf_pool, /*!< buffer pool instance */ buf_pool_t* buf_pool, /*!< buffer pool instance */
enum buf_flush type) /*!< in: BUF_FLUSH_LRU buf_flush_t type) /*!< in: BUF_FLUSH_LRU
or BUF_FLUSH_LIST */ or BUF_FLUSH_LIST */
{ {
ut_ad(type == BUF_FLUSH_LRU || type == BUF_FLUSH_LIST); ut_ad(type == BUF_FLUSH_LRU || type == BUF_FLUSH_LIST);
if (buf_pool == NULL) { if (buf_pool == NULL) {
ulint i; ulint i;
for (i = 0; i < srv_buf_pool_instances; ++i) { for (i = 0; i < srv_buf_pool_instances; ++i) {
buf_pool_t* buf_pool; buf_pool_t* buf_pool;
skipping to change at line 2003 skipping to change at line 2021
scanned); scanned);
if (!bpage) { if (!bpage) {
/* Can't find a single flushable page. */ /* Can't find a single flushable page. */
buf_pool_mutex_exit(buf_pool); buf_pool_mutex_exit(buf_pool);
return(FALSE); return(FALSE);
} }
/* The following call will release the buffer pool and /* The following call will release the buffer pool and
block mutex. */ block mutex. */
buf_flush_page(buf_pool, bpage, BUF_FLUSH_SINGLE_PAGE); buf_flush_page(buf_pool, bpage, BUF_FLUSH_SINGLE_PAGE, true);
buf_flush_sync_datafiles();
/* At this point the page has been written to the disk. /* At this point the page has been written to the disk.
As we are not holding buffer pool or block mutex therefore As we are not holding buffer pool or block mutex therefore
we cannot use the bpage safely. It may have been plucked out we cannot use the bpage safely. It may have been plucked out
of the LRU list by some other thread or it may even have of the LRU list by some other thread or it may even have
relocated in case of a compressed page. We need to start relocated in case of a compressed page. We need to start
the scan of LRU list again to remove the block from the LRU the scan of LRU list again to remove the block from the LRU
list and put it on the free list. */ list and put it on the free list. */
buf_pool_mutex_enter(buf_pool); buf_pool_mutex_enter(buf_pool);
skipping to change at line 2063 skipping to change at line 2079
UNIV_INTERN UNIV_INTERN
ulint ulint
buf_flush_LRU_tail(void) buf_flush_LRU_tail(void)
/*====================*/ /*====================*/
{ {
ulint total_flushed = 0; ulint total_flushed = 0;
for (ulint i = 0; i < srv_buf_pool_instances; i++) { for (ulint i = 0; i < srv_buf_pool_instances; i++) {
buf_pool_t* buf_pool = buf_pool_from_array(i); buf_pool_t* buf_pool = buf_pool_from_array(i);
ulint scan_depth;
/* srv_LRU_scan_depth can be arbitrarily large value.
We cap it with current LRU size. */
buf_pool_mutex_enter(buf_pool);
scan_depth = UT_LIST_GET_LEN(buf_pool->LRU);
buf_pool_mutex_exit(buf_pool);
scan_depth = ut_min(srv_LRU_scan_depth, scan_depth);
/* We divide LRU flush into smaller chunks because /* We divide LRU flush into smaller chunks because
there may be user threads waiting for the flush to there may be user threads waiting for the flush to
end in buf_LRU_get_free_block(). */ end in buf_LRU_get_free_block(). */
for (ulint j = 0; for (ulint j = 0;
j < srv_LRU_scan_depth; j < scan_depth;
j += PAGE_CLEANER_LRU_BATCH_CHUNK_SIZE) { j += PAGE_CLEANER_LRU_BATCH_CHUNK_SIZE) {
ulint n_flushed = 0; ulint n_flushed = 0;
/* Currently page_cleaner is the only thread /* Currently page_cleaner is the only thread
that can trigger an LRU flush. It is possible that can trigger an LRU flush. It is possible
that a batch triggered during last iteration is that a batch triggered during last iteration is
still running, */ still running, */
buf_flush_LRU(buf_pool, if (buf_flush_LRU(buf_pool,
PAGE_CLEANER_LRU_BATCH_CHUNK_SIZE, PAGE_CLEANER_LRU_BATCH_CHUNK_SIZE,
&n_flushed); &n_flushed)) {
/* Allowed only one batch per
buffer pool instance. */
buf_flush_wait_batch_end(
buf_pool, BUF_FLUSH_LRU);
}
total_flushed += n_flushed; if (n_flushed) {
total_flushed += n_flushed;
} else {
/* Nothing to flush */
break;
}
} }
} }
if (total_flushed) { if (total_flushed) {
MONITOR_INC_VALUE_CUMULATIVE( MONITOR_INC_VALUE_CUMULATIVE(
MONITOR_LRU_BATCH_TOTAL_PAGE, MONITOR_LRU_BATCH_TOTAL_PAGE,
MONITOR_LRU_BATCH_COUNT, MONITOR_LRU_BATCH_COUNT,
MONITOR_LRU_BATCH_PAGES, MONITOR_LRU_BATCH_PAGES,
total_flushed); total_flushed);
} }
 End of changes. 48 change blocks. 
126 lines changed or deleted 162 lines changed or added

This html diff was produced by rfcdiff 1.41. The latest version is available from http://tools.ietf.org/tools/rfcdiff/