Implemented a freespace manager that should safely allocate space, even in the face of crashes, and can reclaim unused space (unless an application opens more than one simultaneous transaction that performs allocations)

Fixed some blob bugs (by adding extra fdatasync() calls).

Began factoring out the page management code so that it is an extenstion, and a less integral part of lladd.
This commit is contained in:
Sears Russell 2004-08-21 00:03:30 +00:00
parent b4d7883f66
commit 811bc5c710
19 changed files with 790 additions and 154 deletions

View file

@ -96,18 +96,27 @@ terms specified in this license.
#define OPERATION_LHINSERT 5
#define OPERATION_LHREMOVE 6
#define OPERATION_DEALLOC 7
#define OPERATION_PAGE_ALLOC 8
#define OPERATION_PAGE_DEALLOC 9
/*#define OPERATION_PAGE_ALLOC 8
#define OPERATION_PAGE_DEALLOC 9 */
#define OPERATION_PAGE_SET 10
#define OPERATION_UPDATE_FREESPACE 11
#define OPERATION_UPDATE_FREESPACE_INVERSE 12
#define OPERATION_UPDATE_FREELIST 13
#define OPERATION_UPDATE_FREELIST_INVERSE 14
#define OPERATION_FREE_PAGE 15
#define OPERATION_ALLOC_FREED 16
#define OPERATION_UNALLOC_FREED 17
/* number above should be less than number below */
#define MAX_OPERATIONS 20
/** @todo undocumented */
/** This constant is used as a placeholder to mark slot locations that are invalid.
@see slotted.c, indirect.c
*/
#define INVALID_SLOT PAGE_SIZE
#define NORMAL_SLOT (PAGE_SIZE + 1)
#define BLOB_SLOT (PAGE_SIZE + 2)
/* #define NORMAL_SLOT (PAGE_SIZE + 1)
#define BLOB_SLOT (PAGE_SIZE + 2)*/
/** @deprecated Replace all occurrances with sizeof(blob_record_t) */
#define BLOB_REC_SIZE sizeof(blob_record_t) /*12*/

View file

@ -61,11 +61,26 @@ terms specified in this license.
#include <lladd/operations.h>
int TpageAlloc(int xid, int type);
int TpageAllocMany(int xid, int count, int type);
/** If defined, then pageOperations.h will reuse freed pages.
Unfortunately, the current support is not safe for programs with
multiple concurrent transactions. */
/*#define REUSE_PAGES */
int TpageAlloc(int xid/*, int type*/);
int TpageAllocMany(int xid, int count/*, int type*/);
int TpageDealloc(int xid, int pageid);
int TpageSet(int xid, int pageid, Page* dat);
Operation getPageAlloc();
Operation getPageDealloc();
int TpageSet(int xid, int pageid, byte* dat);
int TpageGet(int xid, int pageid, byte* buf);
/*Operation getPageAlloc();
Operation getPageDealloc(); */
Operation getPageSet();
Operation getUpdateFreespace();
Operation getUpdateFreespaceInverse();
Operation getUpdateFreelist();
Operation getUpdateFreelistInverse();
Operation getFreePageOperation();
Operation getAllocFreedPage();
Operation getUnallocFreedPage();
void pageOperationsInit();
#endif

View file

@ -3,6 +3,6 @@
lib_LIBRARIES=liblladd.a
#liblladd_a_LIBADD=logger/liblogger.a operations/liboperations.a
# removed: recovery.c transactional.c logger.c logger/logparser.c logger/logstreamer.c
liblladd_a_SOURCES=common.c stats.c io.c bufferManager.c linkedlist.c operations.c pageFile.c pageCache.c page.c blobManager.c recovery2.c transactional2.c logger/logEntry.c logger/logWriter.c logger/logHandle.c logger/logger2.c operations/pageOperations.c page/indirect.c operations/decrement.c operations/increment.c operations/prepare.c operations/set.c operations/alloc.c page/slotted.c operations/lladdhash.c
liblladd_a_SOURCES=common.c stats.c io.c bufferManager.c linkedlist.c operations.c pageFile.c pageCache.c page.c blobManager.c recovery2.c transactional2.c logger/logEntry.c logger/logWriter.c logger/logHandle.c logger/logger2.c operations/pageOperations.c page/indirect.c operations/decrement.c operations/increment.c operations/prepare.c operations/set.c operations/alloc.c page/slotted.c operations/lladdhash.c page/header.c
AM_CFLAGS= -g -Wall -pedantic -std=gnu99

View file

@ -14,7 +14,6 @@
#include "io.h"
#include <pbl/pbl.h>
#include "page.h"
#include "page/slotted.h"
#include <stdio.h>
pthread_mutex_t blob_hash_mutex;
@ -40,7 +39,9 @@ static void writeRawRecord(int xid, Page * p, recordid rid, lsn_t lsn, const voi
recordid blob_rec_rid = rid;
blob_rec_rid.size = size;
writeRecord(xid, p, lsn, blob_rec_rid, buf);
/** Tset(xid, blob_rec_rid, buf); @todo how should we write the log entry? */
/* Tset(xid, blob_rec_rid, buf); - We no longer need to write a log
record out here, since we're called by something that is the
result of a log record.*/
}
static lsn_t * tripleHashLookup(int xid, recordid rid) {
lsn_t * ret;
@ -231,13 +232,9 @@ void allocBlob(int xid, Page * p, lsn_t lsn, recordid rid) {
fileSize = myFseek(blobf1, 0, SEEK_END);
blob_rec.offset = fileSize;
slottedSetType(p, rid.slot, BLOB_SLOT);
rid.size = BLOB_SLOT;
/* setRecordType(p, rid, BLOB_SLOT); */
/* rid.size = BLOB_SLOT; */
/* Tset() needs to know to 'do the right thing' here, since we've
changed the size it has recorded for this record, and
writeRawRecord makes sure that that is the case. */
writeRawRecord (xid, p, rid, lsn, &blob_rec, sizeof(blob_record_t));
/* releasePage(p); */
rid.size = blob_rec.size;
@ -258,9 +255,22 @@ void allocBlob(int xid, Page * p, lsn_t lsn, recordid rid) {
if(1 != fwrite(&zero, sizeof(char), 1, blobf0)) { perror(NULL); abort(); }
if(1 != fwrite(&zero, sizeof(char), 1, blobf1)) { perror(NULL); abort(); }
fdatasync(fileno(blobf0));
fdatasync(fileno(blobf1));
funlockfile(blobf0);
funlockfile(blobf1);
/* Tset() needs to know to 'do the right thing' here, since we've
changed the size it has recorded for this record, and
writeRawRecord makes sure that that is the case.
(This call must be after the files have been extended, and synced to disk, since it marks completion of the blob allocation.)
*/
writeRawRecord (xid, p, rid, lsn, &blob_rec, sizeof(blob_record_t));
}
void readBlob(int xid, Page * p, recordid rid, void * buf) {
@ -332,7 +342,11 @@ static FILE * getDirtyFD(int xid, Page * p, lsn_t lsn, recordid rid) {
return fd;
}
/* This function cannot be safely implemented on top of the current
blob implementation since at recovery, we have no way of knowing
whether or not a future write to the blob was performed. (This is
the same reason why we cannot steal pages whose LSN's may be too
low.
void setRangeBlob(int xid, Page * p, lsn_t lsn, recordid rid, const void * buf, long offset, long length) {
FILE * fd;
int readcount;
@ -346,11 +360,22 @@ void setRangeBlob(int xid, Page * p, lsn_t lsn, recordid rid, const void * buf,
assert(offset == rec.offset);
readcount = fwrite(buf, length, 1, fd);
assert(1 == readcount);
fdatasync(fileno(fd));
funlockfile(fd);
}
} */
/** @todo dirtyBlobs should contain the highest LSN that wrote to the
current version of the dirty blob, and the lsn field should be
checked to be sure that it increases monotonically. */
checked to be sure that it increases monotonically.
@todo Correctness / performance problem: Currently, we cannot
manually pin pages in memory, so the record pointing to the blob
may be stolen. Therefore, we must fdatasync() the blob file's
updates to disk each time writeBlob is called.
If we could pin the page, this problem would be solved, and
writeblob would not have to call fdatasync(). The same problem
applies to setRangeBlob.
*/
void writeBlob(int xid, Page * p, lsn_t lsn, recordid rid, const void * buf) {
long offset;
@ -373,18 +398,23 @@ void writeBlob(int xid, Page * p, lsn_t lsn, recordid rid, const void * buf) {
readcount = fwrite(buf, rec.size, 1, fd);
assert(1 == readcount);
fdatasync(fileno(fd));
funlockfile(fd);
/* No need to update the raw blob record. */
}
/** @todo check to see if commitBlobs actually needs to flush blob
files when it's called (are there any dirty blobs associated with
this transaction? */
this transaction?
@todo when writeBlob is fixed, add the fdatasync calls back into commitBlobs().
*/
void commitBlobs(int xid) {
flockfile(blobf0);
flockfile(blobf1);
fdatasync(fileno(blobf0));
fdatasync(fileno(blobf1));
/* fdatasync(fileno(blobf0));
fdatasync(fileno(blobf1)); */
funlockfile(blobf0);
funlockfile(blobf1);
abortBlobs(xid);

View file

@ -38,7 +38,8 @@ static int operate(int xid, Page * p, lsn_t lsn, recordid rid, const void * dat)
/* if(*page_type_ptr(p) == UNINITIALIZED_PAGE) {
*page_type_ptr(p) = SLOTTED_PAGE;
} */
assert(*page_type_ptr(p) == SLOTTED_PAGE);
/* assert(*page_type_ptr(p) == SLOTTED_PAGE); */
if(rid.size >= BLOB_THRESHOLD_SIZE) {
allocBlob(xid, p, lsn, rid);

View file

@ -1,12 +1,26 @@
#define _XOPEN_SOURCE 600
#include <stdlib.h>
#include "../page.h"
#include <lladd/operations/pageOperations.h>
#include <assert.h>
#include "../page/slotted.h"
int __pageAlloc(int xid, Page * p, lsn_t lsn, recordid r, const void * d) {
/*#include "../page/slotted.h"*/
#include "../page/header.h"
#include "../pageFile.h"
static int freelist;
static int freepage;
static pthread_mutex_t pageAllocMutex;
/*int __pageAlloc(int xid, Page * p, lsn_t lsn, recordid r, const void * d) {
int type = *(int*)d;
*page_type_ptr(p) = type;
/** @todo this sort of thing should be done in a centralized way. */
/ ** @todo this sort of thing should be done in a centralized way. * /
if(type == SLOTTED_PAGE) {
slottedPageInitialize(p);
}
@ -19,63 +33,294 @@ int __pageDealloc(int xid, Page * p, lsn_t lsn, recordid r, const void * d) {
*page_type_ptr(p) = UNINITIALIZED_PAGE;
return 0;
}
*/
int __pageSet(int xid, Page * p, lsn_t lsn, recordid r, const void * d) {
memcpy(p->memAddr, d, PAGE_SIZE);
pageWriteLSN(p, lsn);
return 0;
}
int TpageSet(int xid, int pageid, Page* p) {
typedef struct {
int before;
int after;
} update_tuple;
int __update_freespace(int xid, Page * p, lsn_t lsn, recordid r, const void * d) {
assert(r.page == 0);
const update_tuple * t = d;
/* printf("freespace %d -> %d\n", t->before, t->after);
fflush(NULL); */
* headerFreepage_ptr(p) = t->after;
freepage = t->after;
pageWriteLSN(p, lsn);
return 0;
}
int __update_freespace_inverse(int xid, Page * p, lsn_t lsn, recordid r, const void * d) {
#ifdef REUSE_PAGES
assert(r.page == 0);
const update_tuple * t = d;
/* printf("freespace %d <- %d\n", t->before, t->after);
fflush(NULL); */
* headerFreepage_ptr(p) = t->before;
freepage = t->before;
#endif
pageWriteLSN(p, lsn);
return 0;
}
/** @todo need to hold mutex here... */
int __update_freelist(int xid, Page * p, lsn_t lsn, recordid r, const void * d) {
assert(r.page == 0);
const update_tuple * t = d;
/* printf("freelist %d -> %d\n", t->before, t->after);
fflush(NULL); */
* headerFreepagelist_ptr(p) = t->after;
freelist = t->after;
pageWriteLSN(p, lsn);
return 0;
}
int __update_freelist_inverse(int xid, Page * p, lsn_t lsn, recordid r, const void * d) {
assert(r.page == 0);
const update_tuple * t = d;
/* printf("freelist %d <- %d\n", t->before, t->after);
fflush(NULL); */
* headerFreepagelist_ptr(p) = t->before;
freelist = t->before;
pageWriteLSN(p, lsn);
return 0;
}
int __free_page(int xid, Page * p, lsn_t lsn, recordid r, const void * d) {
const int * successor = d;
/* printf("Unallocing page %d\n", r.page);
fflush(NULL); */
memset(p->memAddr, 0, PAGE_SIZE);
*page_type_ptr(p) = LLADD_FREE_PAGE;
*nextfreepage_ptr(p) = *successor;
pageWriteLSN(p, lsn);
return 0;
}
int __alloc_freed(int xid, Page * p, lsn_t lsn, recordid r, const void * d) {
memset(p->memAddr, 0, PAGE_SIZE);
pageWriteLSN(p, lsn);
return 0;
}
int TpageGet(int xid, int pageid, byte *memAddr) {
Page * q = loadPage(pageid);
memcpy(memAddr, q->memAddr, PAGE_SIZE);
releasePage(q);
return 0;
}
int TpageSet(int xid, int pageid, byte * memAddr) {
recordid rid;
rid.page = pageid;
rid.slot = 0;
rid.size = 0;
Tupdate(xid,rid,p->memAddr, OPERATION_PAGE_SET);
Tupdate(xid,rid,memAddr, OPERATION_PAGE_SET);
return 0;
}
/** @todo Need to re-think TpageDealloc/TpageAlloc's logging
strategies when we implement page re-use. Currently, TpageDealloc can
use logical logging. Perhaps TpageDealloc should use physical
logging, and wipe the page to zero, while TpageAlloc should continue to
use logical logging. (Have we ever had operation's whose inverses
took differnt types of log entries? Do such operations work?) */
/** This needs to be called immediately after the storefile is opened,
since it needs to perform raw, synchronous I/O on the pagefile for
bootstrapping purposes. */
void pageOperationsInit() {
Page p;
p.rwlatch = initlock();
p.loadlatch = initlock();
assert(!posix_memalign((void **)&(p.memAddr), PAGE_SIZE, PAGE_SIZE));
p.id = 0;
pageRead(&p);
if(*page_type_ptr(&p) != LLADD_HEADER_PAGE) {
headerPageInitialize(&p);
pageWrite(&p);
}
freelist = *headerFreepagelist_ptr(&p);
freepage = *headerFreepage_ptr(&p);
assert(freepage);
free(p.memAddr);
deletelock(p.loadlatch);
deletelock(p.rwlatch);
pthread_mutex_init(&pageAllocMutex, NULL);
}
/** @todo TpageAlloc / TpageDealloc + undo is not multi-transaction / threadsafe.
Example of the problem:
T1 T2
dealloc(100)
(a) list ptr 30 -> 100
(b) p(100) nil -> 30
alloc() -> 100 <- Can't allow this to happen!
list_ptr 100 -> 30
alloc() -> 30
list_ptr 30 -> 20
abort();
// Really just needs to remove 100 from the linked list. Instead,
we use physical, value based locking.
list ptr 20 <- 30 <- Oops! Page 30 is in use, and we lose the rest
of the freelist, starting at 20!
The partial solution: dealloc() aquires a lock on the freelist until
commit / abort. If other transactions need to allocate when the
lock is held, then they simply do not reuse pages. Since locking is
not yet implemented, we require applications to manually serialize
transactions that call Talloc() or TdeAlloc
A better solution: defer the addition of 100 to the freelist until
commit, and use a 'real' data structure, like a concurrent B-Tree.
*/
int TpageDealloc(int xid, int pageid) {
recordid rid;
#ifdef REUSE_PAGES
update_tuple t;
pthread_mutex_lock(&pageAllocMutex);
#endif
rid.page = pageid;
rid.slot = 0;
rid.size = 0;
Page * p = loadPage(pageid);
int type = *page_type_ptr(p);
releasePage(p);
#ifdef REUSE_PAGES
assert(freelist != pageid);
t.before = freelist;
#endif
Tupdate(xid, rid, &freelist, OPERATION_FREE_PAGE);
#ifdef REUSE_PAGES
t.after = pageid;
freelist = pageid;
rid.page = 0;
Tupdate(xid, rid, &t, OPERATION_UPDATE_FREELIST);
/* OLD STUFF: Page * p = loadPage(pageid); int type = *page_type_ptr(p); releasePage(p); Tupdate(xid, rid, &type, OPERATION_PAGE_DEALLOC); */
pthread_mutex_unlock(&pageAllocMutex);
#endif
Tupdate(xid, rid, &type, OPERATION_PAGE_DEALLOC);
return 0;
}
int TpageAlloc(int xid, int type) {
int TpageAlloc(int xid /*, int type */) {
recordid rid;
int pageid = pageAlloc();
rid.page = pageid;
update_tuple t;
rid.slot = 0;
rid.size = 0;
Tupdate(xid, rid, &type, OPERATION_PAGE_ALLOC);
return pageid;
pthread_mutex_lock(&pageAllocMutex);
int newpage;
#ifdef REUSE_PAGES
if(freelist) {
printf("Re-using old page: %d\n", freelist);
fflush(NULL);
newpage = freelist;
Page * p = loadPage(newpage); /* Could obtain write lock here,
but this is the only function
that should ever touch pages of
type LLADD_FREE_PAGE, and we
already hold a mutex... */
assert(*page_type_ptr(p) == LLADD_FREE_PAGE);
t.before = freelist;
freelist = *nextfreepage_ptr(p);
t.after = freelist;
assert(newpage != freelist);
releasePage(p);
rid.page = newpage;
Tupdate(xid, rid, &freelist, OPERATION_ALLOC_FREED);
rid.page = 0;
Tupdate(xid, rid, &t, OPERATION_UPDATE_FREELIST);
rid.page = newpage;
} else {
#endif
/* printf("Allocing new page: %d\n", freepage);
fflush(NULL); */
t.before = freepage;
newpage = freepage;
freepage++;
t.after = freepage;
/* Don't need to touch the new page. */
rid.page = 0;
Tupdate(xid, rid, &t, OPERATION_UPDATE_FREESPACE);
rid.page = newpage;
#ifdef REUSE_PAGES
}
#endif
pthread_mutex_unlock(&pageAllocMutex);
return newpage;
}
/** Allocs an extent of pages. @todo CONCURRENCY BUG TpageAllocMany
can not be concurrent until ralloc uses TpageAlloc to allocate new
records. (And. concurrency for TpageAllocMany hasn't been
implemented yet...
*/
int TpageAllocMany(int xid, int count, int type) {
int firstPage = -1;
int lastPage = -1;
for(int i = 0 ; i < count; i++) {
int TpageAllocMany(int xid, int count /*, int type*/) {
/* int firstPage = -1;
int lastPage = -1; */
recordid rid;
rid.slot = 0;
rid.size = 0;
update_tuple t;
pthread_mutex_lock(&pageAllocMutex);
t.before = freepage;
int newpage = freepage;
freepage += count;
t.after = freepage;
/* Don't need to touch the new pages. */
rid.page = 0;
Tupdate(xid, rid, &t, OPERATION_UPDATE_FREESPACE);
rid.page = newpage;
/* for(int i = 0 ; i < count; i++) {
int thisPage = TpageAlloc(xid, type);
if(lastPage == -1) {
firstPage = lastPage = thisPage;
@ -83,11 +328,115 @@ int TpageAllocMany(int xid, int count, int type) {
assert((lastPage +1) == thisPage);
lastPage = thisPage;
}
}
return firstPage;
} */
pthread_mutex_unlock(&pageAllocMutex);
return newpage;
}
Operation getPageAlloc() {
/** Safely allocating and freeing pages is suprisingly complex. Here is a summary of the process:
Alloc:
obtain mutex
choose a free page using in-memory data
load page to be used, and update in-memory data. (obtains lock on loaded page)
Tupdate() the page, zeroing it, and saving the old successor in the log.
relase the page (avoid deadlock in next step)
Tupdate() LLADD's header page (the first in the store file) with a new copy of
the in-memory data, saving old version in the log.
release mutex
Free:
obtain mutex
determine the current head of the freelist using in-memory data
Tupdate() the page, initializing it to be a freepage, and physically logging the old version
release the page
Tupdate() LLADD's header page with a new copy of the in-memory data, saving old version in the log
release mutex
*/
Operation getUpdateFreespace() {
Operation o = {
OPERATION_UPDATE_FREESPACE,
sizeof(update_tuple),
OPERATION_UPDATE_FREESPACE_INVERSE,
&__update_freespace
};
return o;
}
Operation getUpdateFreespaceInverse() {
Operation o = {
OPERATION_UPDATE_FREESPACE_INVERSE,
sizeof(update_tuple),
OPERATION_UPDATE_FREESPACE,
&__update_freespace_inverse
};
return o;
}
Operation getUpdateFreelist() {
Operation o = {
OPERATION_UPDATE_FREELIST,
sizeof(update_tuple),
OPERATION_UPDATE_FREELIST_INVERSE,
&__update_freelist
};
return o;
}
Operation getUpdateFreelistInverse() {
Operation o = {
OPERATION_UPDATE_FREELIST_INVERSE,
sizeof(update_tuple),
OPERATION_UPDATE_FREELIST,
&__update_freelist_inverse
};
return o;
}
/** frees a page by zeroing it, setting its type to LLADD_FREE_PAGE,
and setting the successor pointer. This operation physically logs
a whole page, which makes it expensive. Doing so is necessary in
general, but it is possible that application specific logic could
avoid the physical logging here. */
Operation getFreePageOperation() {
Operation o = {
OPERATION_FREE_PAGE,
sizeof(int),
NO_INVERSE_WHOLE_PAGE,
&__free_page
};
return o;
}
/** allocs a page that was once freed by zeroing it. */
Operation getAllocFreedPage() {
Operation o = {
OPERATION_ALLOC_FREED,
sizeof(int),
OPERATION_UNALLOC_FREED,
&__alloc_freed
};
return o;
}
/** does the same thing as getFreePageOperation, but doesn't log a preimage. (Used to undo an alloc of a freed page.) */
Operation getUnallocFreedPage() {
Operation o = {
OPERATION_UNALLOC_FREED,
sizeof(int),
OPERATION_ALLOC_FREED,
&__free_page
};
return o;
}
/*Operation getPageAlloc() {
Operation o = {
OPERATION_PAGE_ALLOC,
sizeof(int),
@ -105,7 +454,7 @@ Operation getPageDealloc() {
&__pageDealloc
};
return o;
}
}*/
Operation getPageSet() {
Operation o = {

View file

@ -151,7 +151,6 @@ void pageInit() {
pthread_mutex_init(&lastAllocedPage_mutex , NULL);
lastAllocedPage = 0;
slottedPageInit();
}
@ -171,7 +170,7 @@ void pageCommit(int xid) {
void pageAbort(int xid) {
}
int pageAllocUnlocked() {
static int pageAllocUnlocked() {
int ret = lastAllocedPage;
Page * p;
@ -197,12 +196,12 @@ int pageAllocUnlocked() {
slot of the first page in the storefile for metadata, and to keep
lastFreepage there, instead of in RAM.
*/
int pageAlloc() {
/*int pageAlloc() {
pthread_mutex_lock(&lastAllocedPage_mutex);
int ret = pageAllocUnlocked();
pthread_mutex_unlock(&lastAllocedPage_mutex);
return ret;
}
}*/
@ -237,6 +236,12 @@ Page *pageMalloc() {
return page;
}
/*void setRecordType(Page * page, recordid rid, int slot_type) {
if(*page_type_ptr(page) == SLOTTED_PAGE) {
slottedSetType(page, rid.slot, slot_type);
}
}*/
void writeRecord(int xid, Page * p, lsn_t lsn, recordid rid, const void *dat) {
assert( (p->id == rid.page) && (p->memAddr != NULL) );

View file

@ -96,6 +96,8 @@ BEGIN_C_DECLS
#define UNINITIALIZED_PAGE 0
#define SLOTTED_PAGE 1
#define INDIRECT_PAGE 2
#define LLADD_HEADER_PAGE 3
#define LLADD_FREE_PAGE 4
#define lsn_ptr(page) (((lsn_t *)(&((page)->memAddr[PAGE_SIZE])))-1)
#define page_type_ptr(page) (((int*)lsn_ptr((page)))-1)
@ -104,6 +106,7 @@ BEGIN_C_DECLS
#define shorts_from_end(page, count) (((short*)end_of_usable_space_ptr((page)))-(count))
#define bytes_from_start(page, count) (((byte*)((page)->memAddr))+(count))
#define ints_from_start(page, count) (((int*)((page)->memAddr))+(count))
#define ints_from_end(page, count) (((int*)end_of_usable_space_ptr((page)))-(count))
#define USABLE_SIZE_OF_PAGE (PAGE_SIZE - sizeof(lsn_t) - sizeof(int))
@ -219,6 +222,24 @@ void pageWriteLSN(Page * page, lsn_t lsn);
*/
lsn_t pageReadLSN(const Page * page);
/**
Sets the record type, if applicable. Right now, this is only
really meaningful in the case of slotted pages that store
information about blobs, but the intention of this function is to
allow a level of indirection so that the blob implementation and
slotted page implementation are independent of each other.
The record type is meant to be a hint to the page implementation,
so no getRecordType function is provided. (If the type of record
does not matter to the page implementation, then it is free to
ignore this call.)
@param page A pointer to the page containing the record of interest.
@param rid The record's id.
@param slot_type The new type of the record. (Must be > PAGE_SIZE).
*/
/*void setRecordType(Page * page, recordid rid, int slot_type); */
/**
* @param xid transaction id @param lsn the lsn that the updated
* record will reflect. This is needed by recovery, and undo. (The
@ -248,7 +269,7 @@ void pageRealloc(Page * p, int id);
@return the pageid of the newly allocated page, which is the
offset of the page in the file, divided by the page size.
*/
int pageAlloc() ;
/*int pageAlloc() ;*/
END_C_DECLS

27
src/lladd/page/header.c Normal file
View file

@ -0,0 +1,27 @@
#include "../page.h"
#include "header.h"
void headerPageInitialize(Page * page) {
memset(page->memAddr, 0, PAGE_SIZE);
*page_type_ptr(page) = LLADD_HEADER_PAGE;
*headerFreepage_ptr(page) = 1;
*headerFreepagelist_ptr(page) = 0;
}
void freePage(Page * freepage, long freepage_id, Page * headerpage) {
memset(freepage->memAddr, 0, PAGE_SIZE);
*page_type_ptr(freepage) = LLADD_FREE_PAGE;
*nextfreepage_ptr(freepage) = *headerFreepagelist_ptr(headerpage);
*headerFreepagelist_ptr(headerpage) = freepage_id;
}
/**
@param freepage Must be the head of the freepage list (right now,
the free list is essentially treated like a stack.
*/
void unfreePage(Page * freepage, Page * headerpage) {
*headerFreepagelist_ptr(headerpage) = *nextfreepage_ptr(freepage);
memset(freepage->memAddr, 0, PAGE_SIZE);
}

11
src/lladd/page/header.h Normal file
View file

@ -0,0 +1,11 @@
void headerPageInitialize(Page * p);
void freePageInitialize(Page * freepage, Page *headerpage);
#define headerFreepage_ptr(page) ints_from_end((page), 1)
#define headerFreepagelist_ptr(page) ints_from_end((page), 2)
/** @todo allow for effecient freeing of blocks of multiple pages, and
implement worst-fit page reclamation (as a special case, use
best-fit for single page allocations.) */
#define nextfreepage_ptr(page) ints_from_end((page), 1)

View file

@ -62,7 +62,7 @@ recordid __rallocMany(int xid, int parentPage, int recordSize, int recordCount);
have to physically log pre- and post-images of the allocated space?
*/
recordid rallocMany(int xid, int recordSize, int recordCount) {
int page = TpageAlloc(xid, SLOTTED_PAGE);
int page = TpageAlloc(xid/*, SLOTTED_PAGE*/);
return __rallocMany(xid, page, recordSize, recordCount);
}
@ -107,7 +107,7 @@ recordid __rallocMany(int xid, int parentPage, int recordSize, int recordCount)
int newPageCount = (int)ceil((double)recordCount / (double)next_level_records_per_page);
int firstChildPage = TpageAllocMany(xid, newPageCount, SLOTTED_PAGE);/*pageAllocMultiple(newPageCount); */
int firstChildPage = TpageAllocMany(xid, newPageCount/*, SLOTTED_PAGE*/);/*pageAllocMultiple(newPageCount); */
int tmpRecordCount = recordCount;
int thisChildPage = firstChildPage;
@ -156,7 +156,7 @@ recordid __rallocMany(int xid, int parentPage, int recordSize, int recordCount)
}
TpageSet(xid, parentPage, &p);
TpageSet(xid, parentPage, p.memAddr);
rid.page = parentPage;
rid.slot = RECORD_ARRAY;

View file

@ -2,7 +2,7 @@
#include "../page.h"
#include "../blobManager.h"
/*#include "../blobManager.h" */
#include "slotted.h"
#include <assert.h>
@ -168,16 +168,18 @@ recordid slottedPreRalloc(int xid, long size) {
pthread_mutex_lock(&lastFreepage_mutex);
/** @todo is ((unsigned int) foo) == -1 portable? Gotta love C.*/
if(lastFreepage == -1) {
lastFreepage = TpageAlloc(xid, SLOTTED_PAGE);
lastFreepage = TpageAlloc(xid/*, SLOTTED_PAGE*/);
p = loadPage(lastFreepage);
slottedPageInitialize(p);
} else {
p = loadPage(lastFreepage);
}
if(slottedFreespace(p) < size ) {
releasePage(p);
lastFreepage = TpageAlloc(xid, SLOTTED_PAGE);
lastFreepage = TpageAlloc(xid/*, SLOTTED_PAGE*/);
p = loadPage(lastFreepage);
slottedPageInitialize(p);
}
ret = slottedRawRalloc(p, size);
@ -254,6 +256,33 @@ recordid slottedPostRalloc(Page * page, lsn_t lsn, recordid rid) {
writelock(page->rwlatch, 376);
if(*page_type_ptr(page) != SLOTTED_PAGE) {
/* slottedPreRalloc calls this when necessary. However, in
the case of a crash, it is possible that
slottedPreRalloc's updates were lost, so we need to check
for that here.
If slottedPreRalloc didn't call slottedPageInitialize,
then there would be a race condition:
Thread 1 Thread 2
preAlloc(big record)
preAlloc(big record) // Should check the freespace of the page and fail
postAlloc(big record)
postAlloc(big record) // Thread 2 stole my space! => Crash?
Note that this _will_ cause trouble if recovery is
multi-threaded, and allows the application to begin
updating the storefile without first locking any pages
that suffer from this problem.
*/
slottedPageInitialize(page);
}
if(*slot_length_ptr(page, rid.slot) == 0 /*|| *slot_length_ptr(page, rid.slot) == -1*/) {
__really_do_ralloc(page, rid);
@ -333,7 +362,7 @@ void slottedWrite(int xid, Page * page, lsn_t lsn, recordid rid, const byte *dat
}
void slottedSetType(Page * p, int slot, int type) {
/*void slottedSetType(Page * p, int slot, int type) {
assert(type > PAGE_SIZE);
writelock(p->rwlatch, 686);
*slot_length_ptr(p, slot) = type;
@ -346,74 +375,6 @@ int slottedGetType(Page * p, int slot) {
ret = *slot_length_ptr(p, slot);
unlock(p->rwlatch);
/* getSlotType does the locking for us. */
/ * getSlotType does the locking for us. * /
return ret > PAGE_SIZE ? ret : NORMAL_SLOT;
}
/*
typedef struct {
int page;
int slot;
/ ** If pageptr is not null, then it is used by the iterator methods.
Otherwise, they re-load the pages and obtain short latches for
each call. * /
Page * pageptr;
} page_iterator_t;
void pageIteratorInit(recordid rid, page_iterator_t * pit, Page * p) {
pit->page = rid.page;
pit->slot = rid.slot;
pit->pageptr = p;
assert((!p) || (p->id == pit->page));
}
int nextSlot(page_iterator_t * pit, recordid * rid) {
Page * p;
int numSlots;
int done = 0;
int ret;
if(pit->pageptr) {
p = pit->pageptr;
} else {
p = loadPage(pit->page);
}
numSlots = readNumSlots(p->memAddr);
while(pit->slot < numSlots && !done) {
if(isValidSlot(p->memAddr, pit->slot)) {
done = 1;
} else {
pit->slot ++;
}
}
if(!done) {
ret = 0;
} else {
ret = 1;
rid->page = pit->page;
rid->slot = pit->slot;
rid->size = getSlotLength(p->memAddr, rid->slot);
if(rid->size >= PAGE_SIZE) {
if(rid->size == BLOB_SLOT) {
blob_record_t br;
pageReadRecord(-1, p, *rid, (byte*)&br);
rid->size = br.size;
}
}
}
if(!pit->pageptr) {
releasePage(p);
}
return ret;
}
*/
}*/

View file

@ -19,6 +19,9 @@
#include <fcntl.h>
#include <unistd.h>
/** Allows boostrapping of the header page. */
#include <lladd/operations/pageOperations.h>
static int stable = -1;
static pthread_mutex_t stable_mutex;

View file

@ -41,10 +41,19 @@ void setupOperationsTable() {
/* operationsTable[OPERATION_LHINSERT] = getLHInsert();
operationsTable[OPERATION_LHREMOVE] = getLHRemove(); */
operationsTable[OPERATION_DEALLOC] = getDealloc();
operationsTable[OPERATION_PAGE_ALLOC] = getPageAlloc();
operationsTable[OPERATION_PAGE_DEALLOC] = getPageDealloc();
/* operationsTable[OPERATION_PAGE_ALLOC] = getPageAlloc();
operationsTable[OPERATION_PAGE_DEALLOC] = getPageDealloc(); */
operationsTable[OPERATION_PAGE_SET] = getPageSet();
operationsTable[OPERATION_UPDATE_FREESPACE] = getUpdateFreespace();
operationsTable[OPERATION_UPDATE_FREESPACE_INVERSE] = getUpdateFreespaceInverse();
operationsTable[OPERATION_UPDATE_FREELIST] = getUpdateFreelist();
operationsTable[OPERATION_UPDATE_FREELIST_INVERSE] = getUpdateFreelistInverse();
operationsTable[OPERATION_FREE_PAGE] = getFreePageOperation();
operationsTable[OPERATION_ALLOC_FREED] = getAllocFreedPage();
operationsTable[OPERATION_UNALLOC_FREED] = getUnallocFreedPage();
}
@ -58,6 +67,8 @@ int Tinit() {
openLogWriter();
pageOperationsInit();
InitiateRecovery();

View file

@ -1,12 +1,12 @@
INCLUDES = @CHECK_CFLAGS@
if HAVE_CHECK
## Had to disable check_lht because lht needs to be rewritten.
TESTS = check_logEntry check_logWriter check_page check_operations check_transactional2 check_recovery check_blobRecovery check_bufferManager check_indirect check_lladdhash
TESTS = check_logEntry check_logWriter check_page check_operations check_transactional2 check_recovery check_blobRecovery check_bufferManager check_indirect check_lladdhash check_pageOperations
else
TESTS =
endif
noinst_PROGRAMS = $(TESTS)
LDADD = @CHECK_LIBS@ $(top_builddir)/src/lladd/liblladd.a $(top_builddir)/src/pbl/libpbl.a $(top_builddir)/src/libdfa/librw.a #-lefence
CLEANFILES = check_lht.log check_logEntry.log storefile.txt logfile.txt blob0_file.txt blob1_file.txt check_blobRecovery.log check_logWriter.log check_operations.log check_recovery.log check_transactional2.log check_page.log check_bufferManager.log check_indirect.log check_bufferMananger.log check_lladdhash.log
CLEANFILES = check_lht.log check_logEntry.log storefile.txt logfile.txt blob0_file.txt blob1_file.txt check_blobRecovery.log check_logWriter.log check_operations.log check_recovery.log check_transactional2.log check_page.log check_bufferManager.log check_indirect.log check_bufferMananger.log check_lladdhash.log check_pageOperations.log
AM_CFLAGS= -g -Wall -pedantic -std=gnu99

View file

@ -278,7 +278,6 @@ START_TEST(indirectSizeTest) {
} END_TEST
Suite * check_suite(void) {
Suite *s = suite_create("indirect");
/* Begin a new test */

View file

@ -58,11 +58,6 @@ terms specified in this license.
#define LOG_NAME "check_page.log"
#define RECORD_SIZE sizeof(int)
/** @todo check_page needs to use loadPage, so it contains its own
delcaration of loadPage. (Otherwise, loadPage would clutter the
interface, which is especially bad, since we hide the Page struct
ffrom the user for locking purposes.)*/
Page * loadPage(int pageid);
pthread_mutex_t random_mutex;
static lsn_t lsn;

View file

@ -0,0 +1,199 @@
/*---
This software is copyrighted by the Regents of the University of
California, and other parties. The following terms apply to all files
associated with the software unless explicitly disclaimed in
individual files.
The authors hereby grant permission to use, copy, modify, distribute,
and license this software and its documentation for any purpose,
provided that existing copyright notices are retained in all copies
and that this notice is included verbatim in any distributions. No
written agreement, license, or royalty fee is required for any of the
authorized uses. Modifications to this software may be copyrighted by
their authors and need not follow the licensing terms described here,
provided that the new terms are clearly indicated on the first page of
each file where they apply.
IN NO EVENT SHALL THE AUTHORS OR DISTRIBUTORS BE LIABLE TO ANY PARTY
FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
ARISING OUT OF THE USE OF THIS SOFTWARE, ITS DOCUMENTATION, OR ANY
DERIVATIVES THEREOF, EVEN IF THE AUTHORS HAVE BEEN ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
THE AUTHORS AND DISTRIBUTORS SPECIFICALLY DISCLAIM ANY WARRANTIES,
INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND
NON-INFRINGEMENT. THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, AND
THE AUTHORS AND DISTRIBUTORS HAVE NO OBLIGATION TO PROVIDE
MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
GOVERNMENT USE: If you are acquiring this software on behalf of the
U.S. government, the Government shall have only "Restricted Rights" in
the software and related documentation as defined in the Federal
Acquisition Regulations (FARs) in Clause 52.227.19 (c) (2). If you are
acquiring the software on behalf of the Department of Defense, the
software shall be classified as "Commercial Computer Software" and the
Government shall have only "Restricted Rights" as defined in Clause
252.227-7013 (c) (1) of DFARs. Notwithstanding the foregoing, the
authors grant the U.S. Government and others acting in its behalf
permission to use and distribute the software in accordance with the
terms specified in this license.
---*/
#include <config.h>
#include <check.h>
#include "../../src/lladd/page.h"
#include "../../src/lladd/page/slotted.h"
#include <lladd/bufferManager.h>
#include <lladd/transactional.h>
#include "../../src/lladd/latches.h"
#include <sched.h>
#include <assert.h>
#include "../check_includes.h"
#define LOG_NAME "check_pageOperations.log"
#include "../../src/lladd/logger/logWriter.h"
void simulateBufferManagerCrash();
extern int numActiveXactions;
START_TEST(pageOpCheckRecovery) {
Tinit();
int xid = Tbegin();
int pageid1 = TpageAlloc(xid);
int pageid2 = TpageAlloc(xid);
Page p;
byte memAddr[PAGE_SIZE];
p.memAddr = memAddr;
memset(p.memAddr, 1, PAGE_SIZE);
TpageSet(xid, pageid1, p.memAddr);
memset(p.memAddr, 2, PAGE_SIZE);
TpageSet(xid, pageid2, p.memAddr);
Tcommit(xid);
xid = Tbegin();
TpageAlloc(xid); /* This test doesn't check for leaks, so we don't need to remember this pageid. */
TpageDealloc(xid, pageid1);
TpageDealloc(xid, pageid2);
simulateBufferManagerCrash();
closeLogWriter();
numActiveXactions = 0;
Tinit();
xid = Tbegin();
int pageid3 = TpageAlloc(xid);
memset(p.memAddr, 3, PAGE_SIZE);
TpageSet(xid, pageid3, p.memAddr);
byte newAddr[PAGE_SIZE];
memset(p.memAddr, 1, PAGE_SIZE);
TpageGet(xid, pageid1, newAddr);
assert(!memcmp(p.memAddr, newAddr, PAGE_SIZE-4));
memset(p.memAddr, 2, PAGE_SIZE);
TpageGet(xid, pageid2, newAddr);
assert(!memcmp(p.memAddr, newAddr, PAGE_SIZE-4));
memset(p.memAddr, 3, PAGE_SIZE);
TpageGet(xid, pageid3, newAddr);
assert(!memcmp(p.memAddr, newAddr, PAGE_SIZE-4));
Tcommit(xid);
Tdeinit();
} END_TEST
/**
@test
*/
START_TEST(pageOpCheckAllocDealloc) {
#ifdef REUSE_PAGES
Tinit();
int xid = Tbegin();
int pageid = TpageAllocMany(xid, 100);
fail_unless(pageid == 1, NULL);
pageid = TpageAlloc(xid);
fail_unless(pageid == 101, NULL);
TpageDealloc(xid, 52);
pageid = TpageAlloc(xid);
fail_unless(pageid == 52, NULL);
printf("\nA\n"); fflush(NULL);
Tcommit(xid);
xid = Tbegin();
printf("\nEverything below this aborts\n"); fflush(NULL);
for(int i = 1; i < 102; i++) {
TpageDealloc(xid, i);
}
printf("\nB\n"); fflush(NULL);
for(int i = 0; i < 50; i++) {
pageid = TpageAlloc(xid);
assert(pageid < 102);
}
printf("\nC - aborting\n"); fflush(NULL);
Tabort(xid);
printf("\nD - aborted\n"); fflush(NULL);
xid = Tbegin();
pageid = TpageAlloc(xid);
printf("\nE\n"); fflush(NULL);
fail_unless(pageid == 102, NULL);
Tcommit(xid);
Tdeinit();
#else
printf(" Skipping 1 check for page leaks since page reuse is diabled.\n");
#endif
} END_TEST
Suite * check_suite(void) {
Suite *s = suite_create("pageOperations");
/* Begin a new test */
TCase *tc = tcase_create("pageOperations");
/* Sub tests are added, one per line, here */
tcase_add_test(tc, pageOpCheckAllocDealloc);
tcase_add_test(tc, pageOpCheckRecovery);
/* --------------------------------------------- */
tcase_add_checked_fixture(tc, setup, teardown);
suite_add_tcase(s, tc);
return s;
}
#include "../check_setup.h"