413 lines
10 KiB
C
413 lines
10 KiB
C
|
/************************************************************************
|
||
|
* implementation of pages
|
||
|
|
||
|
STRUCTURE OF A PAGE
|
||
|
|
||
|
+-------------------------------------------+-----------------------+--+
|
||
|
| DATA SECTION +--------->| RID: (PAGE, 0) | |
|
||
|
| +-----------------+ | +-----------------------+ |
|
||
|
| +-->| RID: (PAGE, 1) | | |
|
||
|
| | +-----------------+ | |
|
||
|
| | | |
|
||
|
| +-----------------+ | +----------------------------+
|
||
|
| | | +--->| RID: (PAGE, n) |
|
||
|
| | | | +----------------------------+
|
||
|
|======================================================================|
|
||
|
|^ FREE SPACE | | | |
|
||
|
|+-----------------------|-------|---|--------------------+ |
|
||
|
| | | | | |
|
||
|
| +-------------|-------|---+ | |
|
||
|
| | | | | |
|
||
|
| +---|---+-----+---|---+---|---+--------------+-----|------+-----+
|
||
|
| | slotn | ... | slot1 | slot0 | num of slots | free space | LSN |
|
||
|
+------+-------+-----+-------+-------+--------------+------------+-----+
|
||
|
|
||
|
NOTE:
|
||
|
- slots are zero indexed.
|
||
|
- slots are of implemented as (offset, length)
|
||
|
|
||
|
Latching summary:
|
||
|
|
||
|
Each page has an associated read/write lock. This lock only
|
||
|
protects the internal layout of the page, and the members of the
|
||
|
page struct. Here is how it is held in various circumstances:
|
||
|
|
||
|
Record allocation: Write lock
|
||
|
Record read: Read lock
|
||
|
Read LSN Read lock
|
||
|
Record write *READ LOCK*
|
||
|
Write LSN Write lock
|
||
|
|
||
|
Any circumstance where these locks are held during an I/O operation
|
||
|
is a bug.
|
||
|
|
||
|
$Id$
|
||
|
|
||
|
************************************************************************/
|
||
|
#include "../page.h"
|
||
|
#include "../blobManager.h"
|
||
|
#include "slotted.h"
|
||
|
#include <assert.h>
|
||
|
|
||
|
/* ------------------ STATIC FUNCTIONS. NONE OF THESE ACQUIRE LOCKS
|
||
|
ON THE MEMORY THAT IS PASSED INTO THEM -------------*/
|
||
|
|
||
|
static void __really_do_ralloc(Page * page, recordid rid) ;
|
||
|
|
||
|
/**
|
||
|
|
||
|
Move all of the records to the beginning of the page in order to
|
||
|
increase the available free space.
|
||
|
|
||
|
The caller of this function must have a writelock on the page.
|
||
|
*/
|
||
|
static void pageCompact(Page * page) {
|
||
|
|
||
|
int i;
|
||
|
Page bufPage;
|
||
|
byte buffer[PAGE_SIZE];
|
||
|
|
||
|
int numSlots;
|
||
|
int meta_size;
|
||
|
|
||
|
bufPage.id = -1;
|
||
|
bufPage.memAddr = buffer;
|
||
|
|
||
|
/* Can't compact in place, slot numbers can come in different orders than
|
||
|
the physical space allocated to them. */
|
||
|
|
||
|
memset(buffer, -1, PAGE_SIZE);
|
||
|
|
||
|
meta_size = (((int)page->memAddr) + PAGE_SIZE ) - (int)end_of_usable_space_ptr(page);
|
||
|
/* *slot_length_ptr(page, (*numslots_ptr(page))-1);*/
|
||
|
|
||
|
memcpy(buffer + PAGE_SIZE - meta_size, page->memAddr + PAGE_SIZE - meta_size, meta_size);
|
||
|
|
||
|
pageInitialize(&bufPage);
|
||
|
|
||
|
numSlots = *numslots_ptr(page);
|
||
|
for (i = 0; i < numSlots; i++) {
|
||
|
/* printf("i = %d\n", i); */
|
||
|
if (isValidSlot(page, i)) {
|
||
|
/* printf("copying %d\n", i);
|
||
|
fflush(NULL); */
|
||
|
/* DEBUG("Buffer offset: %d\n", freeSpace); */
|
||
|
recordid rid;
|
||
|
|
||
|
rid.page = -1;
|
||
|
rid.slot = i;
|
||
|
rid.size = *slot_length_ptr(page, i);
|
||
|
|
||
|
__really_do_ralloc(&bufPage, rid);
|
||
|
|
||
|
memcpy(record_ptr(&bufPage, rid.slot), record_ptr(page, rid.slot), rid.size);
|
||
|
|
||
|
} else {
|
||
|
*slot_ptr(&bufPage, i) = INVALID_SLOT;
|
||
|
*slot_length_ptr(&bufPage, i) = *freelist_ptr(page);
|
||
|
*freelist_ptr(page) = i;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/* Rebuild the freelist. */
|
||
|
|
||
|
/* *freelist_ptr(&bufPage) = 0;
|
||
|
for (i = 0; i < numSlots; i++) {
|
||
|
if (!isValidSlot(&bufPage, i)) {
|
||
|
*slot_length_ptr(&bufPage, i) = *freelist_ptr(&bufPage);
|
||
|
*freelist_ptr(&bufPage) = i;
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
*/
|
||
|
|
||
|
memcpy(page->memAddr, buffer, PAGE_SIZE);
|
||
|
|
||
|
}
|
||
|
|
||
|
void pageInitialize(Page * page) {
|
||
|
/* printf("Initializing page %d\n", page->id);
|
||
|
fflush(NULL); */
|
||
|
memset(page->memAddr, 0, PAGE_SIZE);
|
||
|
*freespace_ptr(page) = 0;
|
||
|
*numslots_ptr(page) = 0;
|
||
|
*freelist_ptr(page) = INVALID_SLOT;
|
||
|
}
|
||
|
|
||
|
int unlocked_freespace(Page * page) {
|
||
|
return (int)slot_length_ptr(page, *numslots_ptr(page)) - (int)(page->memAddr + *freespace_ptr(page));
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* freeSpace() assumes that the page is already loaded in memory. It takes
|
||
|
* as a parameter a Page, and returns an estimate of the amount of free space
|
||
|
* available to a new slot on this page. (This is the amount of unused space
|
||
|
* in the page, minus the size of a new slot entry.) This is either exact,
|
||
|
* or an underestimate.
|
||
|
*
|
||
|
* @todo is it ever safe to call freespace without a lock on the page?
|
||
|
*
|
||
|
*/
|
||
|
int freespace(Page * page) {
|
||
|
int ret;
|
||
|
readlock(page->rwlatch, 292);
|
||
|
ret = unlocked_freespace(page);
|
||
|
readunlock(page->rwlatch);
|
||
|
return ret;
|
||
|
}
|
||
|
|
||
|
|
||
|
|
||
|
recordid pageRalloc(Page * page, int size) {
|
||
|
|
||
|
writelock(page->rwlatch, 342);
|
||
|
|
||
|
recordid rid;
|
||
|
|
||
|
rid.page = page->id;
|
||
|
rid.slot = *numslots_ptr(page);
|
||
|
rid.size = size;
|
||
|
|
||
|
/*
|
||
|
Reuse an old (invalid) slot entry.
|
||
|
|
||
|
@todo This is terribly slow, but seems to be necessary, or
|
||
|
we will leak slot ids. Is there a better (non n^2) way?
|
||
|
|
||
|
Perhaps we could use the empty slots to construct a linked
|
||
|
list of free pages. (The slot length could be the offset
|
||
|
of the next slot on the list, and we could use the standard
|
||
|
INVALID_SLOT value to distinguish between the types.)
|
||
|
|
||
|
*/
|
||
|
/* Old way */
|
||
|
|
||
|
/* int i;
|
||
|
for (i = 0; i < numSlots; i++) {
|
||
|
if (!isValidSlot(page, i)) {
|
||
|
rid.slot = i;
|
||
|
break;
|
||
|
}
|
||
|
} */
|
||
|
|
||
|
|
||
|
/* new way @todo leaks slot zero (until pageCompact is called)*/
|
||
|
if(*freelist_ptr(page) != INVALID_SLOT) {
|
||
|
rid.slot = *freelist_ptr(page);
|
||
|
/* printf("Reusing old slot %d\n", rid.slot); */
|
||
|
*freelist_ptr(page) = *slot_length_ptr(page, rid.slot);
|
||
|
*slot_length_ptr(page, rid.slot) = 0;
|
||
|
} else {
|
||
|
/* printf("Allocating new slot\n"); */
|
||
|
}
|
||
|
fflush(NULL);
|
||
|
|
||
|
__really_do_ralloc(page, rid);
|
||
|
|
||
|
/* DEBUG("slot: %d freespace: %d\n", rid.slot, freeSpace); */
|
||
|
|
||
|
writeunlock(page->rwlatch);
|
||
|
|
||
|
return rid;
|
||
|
}
|
||
|
|
||
|
static void __really_do_ralloc(Page * page, recordid rid) {
|
||
|
|
||
|
int freeSpace;
|
||
|
|
||
|
assert(rid.size > 0);
|
||
|
|
||
|
if(unlocked_freespace(page) < rid.size) {
|
||
|
pageCompact(page);
|
||
|
|
||
|
/* Make sure there's enough free space... */
|
||
|
assert (unlocked_freespace(page) >= rid.size);
|
||
|
}
|
||
|
|
||
|
freeSpace = *freespace_ptr(page);
|
||
|
|
||
|
|
||
|
if(*numslots_ptr(page) <= rid.slot) {
|
||
|
/* printf("Incrementing numSlots."); */
|
||
|
*numslots_ptr(page) = rid.slot + 1;
|
||
|
}
|
||
|
|
||
|
DEBUG("Num slots %d\trid.slot %d\n", *numslots_ptr(page), rid.slot);
|
||
|
|
||
|
*freespace_ptr(page) = freeSpace + rid.size;
|
||
|
|
||
|
*slot_ptr(page, rid.slot) = freeSpace;
|
||
|
/* assert(!*slot_length_ptr(page, rid.slot) || (-1 == *slot_length_ptr(page, rid.slot)));*/
|
||
|
*slot_length_ptr(page, rid.slot) = rid.size;
|
||
|
|
||
|
}
|
||
|
|
||
|
/** Only used for recovery, to make sure that consistent RID's are created
|
||
|
* on log playback. */
|
||
|
recordid pageSlotRalloc(Page * page, lsn_t lsn, recordid rid) {
|
||
|
|
||
|
writelock(page->rwlatch, 376);
|
||
|
|
||
|
if(*slot_length_ptr(page, rid.slot) == 0 /*|| *slot_length_ptr(page, rid.slot) == -1*/) {
|
||
|
|
||
|
__really_do_ralloc(page, rid);
|
||
|
|
||
|
} else {
|
||
|
|
||
|
assert((rid.size == *slot_length_ptr(page, rid.slot)) ||
|
||
|
(*slot_length_ptr(page, rid.slot) >= PAGE_SIZE));
|
||
|
|
||
|
}
|
||
|
|
||
|
writeunlock(page->rwlatch);
|
||
|
|
||
|
return rid;
|
||
|
}
|
||
|
|
||
|
|
||
|
void pageDeRalloc(Page * page, recordid rid) {
|
||
|
|
||
|
readlock(page->rwlatch, 443);
|
||
|
|
||
|
*slot_ptr(page, rid.slot) = INVALID_SLOT;
|
||
|
*slot_length_ptr(page, rid.slot) = *freelist_ptr(page);
|
||
|
*freelist_ptr(page) = rid.slot;
|
||
|
|
||
|
unlock(page->rwlatch);
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
This should trust the rid (since the caller needs to
|
||
|
override the size in special circumstances)
|
||
|
|
||
|
@todo If the rid size has been overridden, we should check to make
|
||
|
sure that this really is a special record.
|
||
|
*/
|
||
|
void pageReadRecord(int xid, Page * page, recordid rid, byte *buff) {
|
||
|
|
||
|
int slot_length;
|
||
|
readlock(page->rwlatch, 519);
|
||
|
|
||
|
assert(page->id == rid.page);
|
||
|
slot_length = *slot_length_ptr(page, rid.slot);
|
||
|
assert((rid.size == slot_length) || (slot_length >= PAGE_SIZE));
|
||
|
|
||
|
if(!memcpy(buff, record_ptr(page, rid.slot), rid.size)) {
|
||
|
perror("memcpy");
|
||
|
abort();
|
||
|
}
|
||
|
|
||
|
unlock(page->rwlatch);
|
||
|
|
||
|
}
|
||
|
|
||
|
void pageWriteRecord(int xid, Page * page, lsn_t lsn, recordid rid, const byte *data) {
|
||
|
int slot_length;
|
||
|
|
||
|
writelock(page->rwlatch, 529);
|
||
|
|
||
|
assert(rid.size < PAGE_SIZE);
|
||
|
assert(page->id == rid.page);
|
||
|
|
||
|
slot_length = *slot_length_ptr(page, rid.slot);
|
||
|
assert((rid.size == slot_length) || (slot_length >= PAGE_SIZE));
|
||
|
|
||
|
if(!memcpy(record_ptr(page, rid.slot), data, rid.size)) {
|
||
|
perror("memcpy");
|
||
|
abort();
|
||
|
}
|
||
|
|
||
|
page->LSN = lsn;
|
||
|
/* *lsn_ptr(page) = lsn */
|
||
|
pageWriteLSN(page);
|
||
|
unlock(page->rwlatch);
|
||
|
|
||
|
}
|
||
|
|
||
|
|
||
|
/** @todo: Should the caller need to obtain the writelock when calling pageSetSlotType? */
|
||
|
void pageSetSlotType(Page * p, int slot, int type) {
|
||
|
assert(type > PAGE_SIZE);
|
||
|
writelock(p->rwlatch, 686);
|
||
|
*slot_length_ptr(p, slot) = type;
|
||
|
unlock(p->rwlatch);
|
||
|
}
|
||
|
|
||
|
int pageGetSlotType(Page * p, int slot, int type) {
|
||
|
int ret;
|
||
|
readlock(p->rwlatch, 693);
|
||
|
ret = *slot_length_ptr(p, slot);
|
||
|
unlock(p->rwlatch);
|
||
|
|
||
|
/* getSlotType does the locking for us. */
|
||
|
return ret > PAGE_SIZE ? ret : NORMAL_SLOT;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*
|
||
|
typedef struct {
|
||
|
int page;
|
||
|
int slot;
|
||
|
/ ** If pageptr is not null, then it is used by the iterator methods.
|
||
|
Otherwise, they re-load the pages and obtain short latches for
|
||
|
each call. * /
|
||
|
Page * pageptr;
|
||
|
} page_iterator_t;
|
||
|
|
||
|
|
||
|
|
||
|
void pageIteratorInit(recordid rid, page_iterator_t * pit, Page * p) {
|
||
|
pit->page = rid.page;
|
||
|
pit->slot = rid.slot;
|
||
|
pit->pageptr = p;
|
||
|
assert((!p) || (p->id == pit->page));
|
||
|
}
|
||
|
|
||
|
int nextSlot(page_iterator_t * pit, recordid * rid) {
|
||
|
Page * p;
|
||
|
int numSlots;
|
||
|
int done = 0;
|
||
|
int ret;
|
||
|
if(pit->pageptr) {
|
||
|
p = pit->pageptr;
|
||
|
} else {
|
||
|
p = loadPage(pit->page);
|
||
|
}
|
||
|
|
||
|
numSlots = readNumSlots(p->memAddr);
|
||
|
while(pit->slot < numSlots && !done) {
|
||
|
|
||
|
if(isValidSlot(p->memAddr, pit->slot)) {
|
||
|
done = 1;
|
||
|
} else {
|
||
|
pit->slot ++;
|
||
|
}
|
||
|
|
||
|
}
|
||
|
if(!done) {
|
||
|
ret = 0;
|
||
|
} else {
|
||
|
ret = 1;
|
||
|
rid->page = pit->page;
|
||
|
rid->slot = pit->slot;
|
||
|
rid->size = getSlotLength(p->memAddr, rid->slot);
|
||
|
if(rid->size >= PAGE_SIZE) {
|
||
|
|
||
|
if(rid->size == BLOB_SLOT) {
|
||
|
blob_record_t br;
|
||
|
pageReadRecord(-1, p, *rid, (byte*)&br);
|
||
|
rid->size = br.size;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if(!pit->pageptr) {
|
||
|
releasePage(p);
|
||
|
}
|
||
|
|
||
|
return ret;
|
||
|
|
||
|
}
|
||
|
*/
|
||
|
|