Added support for raw pages, implemented posix_memalign workaround, and re-enabled O_DIRECT by default

This commit is contained in:
Sears Russell 2006-04-12 01:41:35 +00:00
parent 33cbbb0e2d
commit 11c311bc91
10 changed files with 218 additions and 62 deletions

View file

@ -3,7 +3,7 @@ LDADD=$(top_builddir)/src/2pc/lib2pc.a $(top_builddir)/src/libdfa/libdfa.a \
$(top_builddir)/src/libdfa/librw.a $(top_builddir)/src/libdfa/librw.a
bin_PROGRAMS=naiveHash logicalHash readLogicalHash naiveMultiThreaded logicalMultThreaded rawSet \ bin_PROGRAMS=naiveHash logicalHash readLogicalHash naiveMultiThreaded logicalMultThreaded rawSet \
arrayListSet logicalMultiReaders linearHashNTA linkedListNTA pageOrientedListNTA \ arrayListSet logicalMultiReaders linearHashNTA linkedListNTA pageOrientedListNTA \
linearHashNTAThreaded linearHashNTAMultiReader linearHashNTAWriteRequests transitiveClosure linearHashNTAThreaded linearHashNTAMultiReader linearHashNTAWriteRequests transitiveClosure zeroCopy
AM_CFLAGS= -g -Wall -pedantic -std=gnu99 AM_CFLAGS= -g -Wall -pedantic -std=gnu99
SUBDIRS=berkeleyDB SUBDIRS=berkeleyDB

55
benchmarks/zeroCopy.c Normal file
View file

@ -0,0 +1,55 @@
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <lladd/transactional.h>
#include <unistd.h>
#include "../src/lladd/page/raw.h"
#include <string.h>
int main(int argc, char** argv) {
assert(argc == 3);
#define ZEROCOPY 0
#define LSNMODE 1
int mode = atoi(argv[1]);
int count = atoi(argv[2]);
int longsPerPage = PAGE_SIZE / sizeof(long);
if(ZEROCOPY == mode) {
printf("Running ZEROCOPY mode. Count = %d\n", count);
}
Tinit();
int xid = Tbegin();
long * buf = malloc(longsPerPage * sizeof(long));
for(int i = 0; i < count; i++) {
int pageNum = TpageAlloc(xid);
Page * p = loadPage(xid, pageNum);
if(ZEROCOPY == mode) {
long * data = (long*) rawPageGetData(xid, p);
for(int j = 0; j < longsPerPage; j++) {
data[j] = j;
}
rawPageSetData(xid, 0, p);
} else if(LSNMODE == mode) {
long * data = (long*) rawPageGetData(xid, p);
memcpy(buf, data, PAGE_SIZE);
for(int j = 0; j < longsPerPage; j++) {
buf[j] = j;
}
memcpy(data, buf, PAGE_SIZE);
rawPageSetData(xid, 0, p);
}
releasePage(p);
}
Tcommit(xid);
Tdeinit();
}

View file

@ -95,6 +95,8 @@ int LogDeinit();
void LogForce(lsn_t lsn); void LogForce(lsn_t lsn);
void LogTruncate(lsn_t lsn); void LogTruncate(lsn_t lsn);
lsn_t LogFlushedLSN();
lsn_t LogTruncationPoint(); lsn_t LogTruncationPoint();

View file

@ -8,7 +8,7 @@ liblladd_a_SOURCES=crc32.c common.c stats.c io.c bufferManager.c linkedlist.c op
lockManager.c iterator.c consumer.c arrayCollection.c ringbuffer.c fifo.c multiplexer.c graph.c\ lockManager.c iterator.c consumer.c arrayCollection.c ringbuffer.c fifo.c multiplexer.c graph.c\
logger/logEntry.c logger/logWriter.c logger/inMemoryLog.c logger/logHandle.c logger/logger2.c \ logger/logEntry.c logger/logWriter.c logger/inMemoryLog.c logger/logHandle.c logger/logger2.c \
logger/logMemory.c \ logger/logMemory.c \
page/slotted.c page/header.c page/fixed.c compensations.c \ page/raw.c page/slotted.c page/header.c page/fixed.c compensations.c \
operations/pageOperations.c page/indirect.c operations/decrement.c \ operations/pageOperations.c page/indirect.c operations/decrement.c \
operations/increment.c operations/prepare.c operations/set.c \ operations/increment.c operations/prepare.c operations/set.c \
operations/alloc.c operations/noop.c operations/instantSet.c \ operations/alloc.c operations/noop.c operations/instantSet.c \

View file

@ -57,6 +57,8 @@ terms specified in this license.
static int nextPage = 0; static int nextPage = 0;
static pthread_mutex_t pageMallocMutex; static pthread_mutex_t pageMallocMutex;
static void * addressFromMalloc = 0;
/** We need one dummy page for locking purposes, so this array has one extra page in it. */ /** We need one dummy page for locking purposes, so this array has one extra page in it. */
Page pool[MAX_BUFFER_SIZE+1]; Page pool[MAX_BUFFER_SIZE+1];
@ -67,34 +69,32 @@ void bufferPoolInit() {
pthread_mutex_init(&pageMallocMutex, NULL); pthread_mutex_init(&pageMallocMutex, NULL);
byte * bufferSpace ;
#ifdef HAVE_POSIX_MEMALIGN
int ret = posix_memalign((void*)&bufferSpace, PAGE_SIZE, PAGE_SIZE * (MAX_BUFFER_SIZE + 1));
assert(!ret);
addressFromMalloc = bufferSpace;
#else
bufferSpace = malloc(PAGE_SIZE * (MAX_BUFFER_SIZE + 2));
assert(bufferSpace);
addressFromMalloc = bufferSpace;
bufferSpace += PAGE_SIZE - (bufferSpace % PAGE_SIZE);
#endif
for(int i = 0; i < MAX_BUFFER_SIZE+1; i++) { for(int i = 0; i < MAX_BUFFER_SIZE+1; i++) {
pool[i].rwlatch = initlock(); pool[i].rwlatch = initlock();
pool[i].loadlatch = initlock(); pool[i].loadlatch = initlock();
/** @todo The buffer pool should be allocated as a contiguous pool[i].memAddr = &(bufferSpace[i*PAGE_SIZE]);
unit. This would also allow us to work around systems that
lack posix_memalign...*/
#ifdef HAVE_POSIX_MEMALIGN
// Note that the backup behavior for posix_memalign breaks O_DIRECT. Therefore, O_DIRECT should be
// disabled whenever posix_memalign is not present.
int ret = posix_memalign((void*)(&(pool[i].memAddr)), PAGE_SIZE, PAGE_SIZE);
assert(!ret);
#else
//#warn Not using posix_memalign
pool[i].memAddr = malloc(PAGE_SIZE);
assert(pool[i].memAddr);
#endif
} }
// pthread_mutex_init(&lastAllocedPage_mutex , NULL);
// lastAllocedPage = 0;
} }
void bufferPoolDeInit() { void bufferPoolDeInit() {
for(int i = 0; i < MAX_BUFFER_SIZE+1; i++) { for(int i = 0; i < MAX_BUFFER_SIZE+1; i++) {
deletelock(pool[i].rwlatch); deletelock(pool[i].rwlatch);
deletelock(pool[i].loadlatch); deletelock(pool[i].loadlatch);
free(pool[i].memAddr); // breaks efence
} }
free(addressFromMalloc); // breaks efence
pthread_mutex_destroy(&pageMallocMutex); pthread_mutex_destroy(&pageMallocMutex);
} }

View file

@ -91,15 +91,6 @@ terms specified in this license.
#include <lladd/bufferPool.h> #include <lladd/bufferPool.h>
void pageWriteLSN(int xid, Page * page, lsn_t lsn) { void pageWriteLSN(int xid, Page * page, lsn_t lsn) {
/* unlocked since we're only called by a function that holds the writelock. */
/* *(long *)(page->memAddr + START_OF_LSN) = page->LSN; */
/* tr y {
if(globalLockM anager.writ eLockPage) {
globalLock Manager.writeL ockPage(xid, page->id);
}
} en d; */
if(page->LSN < lsn) { if(page->LSN < lsn) {
page->LSN = lsn; page->LSN = lsn;
*lsn_ptr(page) = page->LSN; *lsn_ptr(page) = page->LSN;
@ -119,10 +110,6 @@ lsn_t pageReadLSN(const Page * page) {
return ret; return ret;
} }
/* ----- end static functions ----- */
/* ----- (de)initialization functions. Do not need to support multithreading. -----*/ /* ----- (de)initialization functions. Do not need to support multithreading. -----*/
/** /**
@ -150,14 +137,6 @@ void writeRecord(int xid, Page * p, lsn_t lsn, recordid rid, const void *dat) {
assert( (p->id == rid.page) && (p->memAddr != NULL) ); assert( (p->id == rid.page) && (p->memAddr != NULL) );
/* writelock(p->rwlatch, 225); // Need a writelock so that we can update the lsn.
int lock_ret = pageWriteLSN(xid, p, lsn);
unlock(p->rwlatch);
if(lock_ret) {
return lock_ret;
} */
writelock(p->rwlatch, 225); writelock(p->rwlatch, 225);
pageWriteLSN(xid, p, lsn); pageWriteLSN(xid, p, lsn);
unlock(p->rwlatch); unlock(p->rwlatch);
@ -227,9 +206,8 @@ int getRecordTypeUnlocked(int xid, Page * p, recordid rid) {
return UNINITIALIZED_RECORD; return UNINITIALIZED_RECORD;
} else if(page_type == SLOTTED_PAGE) { } else if(page_type == SLOTTED_PAGE) {
if(*numslots_ptr(p) <= rid.slot || *slot_ptr(p, rid.slot) == INVALID_SLOT /*|| *slot_length_ptr(p, rid.slot) == INVALID_SLOT*/) { if(*numslots_ptr(p) <= rid.slot || *slot_ptr(p, rid.slot) == INVALID_SLOT) {
return UNINITIALIZED_PAGE; return UNINITIALIZED_PAGE;
// } else if(*slot_length_ptr(p, rid.slot) == BLOB_REC_SIZE) {
} else if (*slot_length_ptr(p, rid.slot) == BLOB_SLOT) { } else if (*slot_length_ptr(p, rid.slot) == BLOB_SLOT) {
return BLOB_RECORD; return BLOB_RECORD;
} else { } else {
@ -268,15 +246,6 @@ void writeRecordUnlocked(int xid, Page * p, lsn_t lsn, recordid rid, const void
assert( (p->id == rid.page) && (p->memAddr != NULL) ); assert( (p->id == rid.page) && (p->memAddr != NULL) );
/* writelock(p->rwlatch, 225); // Need a writelock so that we can update the lsn.
int lock_error = pageWriteLSN(xid, p, lsn);
if(lock_error) {
unlock(p->rwlatch);
return lock_error;
}
unlock(p->rwlatch); */
// Need a writelock so that we can update the lsn. // Need a writelock so that we can update the lsn.
writelock(p->rwlatch, 225); writelock(p->rwlatch, 225);

View file

@ -112,7 +112,6 @@ BEGIN_C_DECLS
struct Page_s { struct Page_s {
/** @todo Shouldn't Page.id be a long? */ /** @todo Shouldn't Page.id be a long? */
int id; int id;
/** @todo The Page.LSN field seems extraneous. Why do we need it? */
lsn_t LSN; lsn_t LSN;
byte *memAddr; byte *memAddr;
byte dirty; byte dirty;

40
src/lladd/page/raw.c Normal file
View file

@ -0,0 +1,40 @@
#include "page/raw.h"
#include <lladd/logger/logger2.h>
/**
@todo Should rawPageInferMetadata set a page type in the Page
struct?
*/
void rawPageInferMetadata(Page * p) {
p->LSN = LogFlushedLSN();
}
byte* rawPageGetData(int xid, Page * p) {
return units_from_start_raw(byte, p, 0);
}
void rawPageSetData(int xid, lsn_t lsn, Page * p) {
writelock(p->rwlatch, 255);
rawPageWriteLSN(xid, p, lsn);
p->dirty = 1;
unlock(p->rwlatch);
return;
}
lsn_t rawPageReadLSN(const Page * p) {
// There are some potential optimizations here since the page
// doesn't "really" have an LSN at all, but we need to be careful
// about log truncation...
return p->LSN;
}
void rawPageWriteLSN(int xid, Page * p, lsn_t lsn) {
if(p->LSN < lsn) { p->LSN = lsn; }
}
void rawPageCommit(int xid) {
// no-op
}
void rawPageAbort(int xid) {
// no-op
}

98
src/lladd/page/raw.h Normal file
View file

@ -0,0 +1,98 @@
/*---
This software is copyrighted by the Regents of the University of
California, and other parties. The following terms apply to all files
associated with the software unless explicitly disclaimed in
individual files.
The authors hereby grant permission to use, copy, modify, distribute,
and license this software and its documentation for any purpose,
provided that existing copyright notices are retained in all copies
and that this notice is included verbatim in any distributions. No
written agreement, license, or royalty fee is required for any of the
authorized uses. Modifications to this software may be copyrighted by
their authors and need not follow the licensing terms described here,
provided that the new terms are clearly indicated on the first page of
each file where they apply.
IN NO EVENT SHALL THE AUTHORS OR DISTRIBUTORS BE LIABLE TO ANY PARTY
FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
ARISING OUT OF THE USE OF THIS SOFTWARE, ITS DOCUMENTATION, OR ANY
DERIVATIVES THEREOF, EVEN IF THE AUTHORS HAVE BEEN ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
THE AUTHORS AND DISTRIBUTORS SPECIFICALLY DISCLAIM ANY WARRANTIES,
INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND
NON-INFRINGEMENT. THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, AND
THE AUTHORS AND DISTRIBUTORS HAVE NO OBLIGATION TO PROVIDE
MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
GOVERNMENT USE: If you are acquiring this software on behalf of the
U.S. government, the Government shall have only "Restricted Rights" in
the software and related documentation as defined in the Federal
Acquisition Regulations (FARs) in Clause 52.227.19 (c) (2). If you are
acquiring the software on behalf of the Department of Defense, the
software shall be classified as "Commercial Computer Software" and the
Government shall have only "Restricted Rights" as defined in Clause
252.227-7013 (c) (1) of DFARs. Notwithstanding the foregoing, the
authors grant the U.S. Government and others acting in its behalf
permission to use and distribute the software in accordance with the
terms specified in this license.
---*/
/**
* @file
*
* interface for dealing with raw pages
*
* This file provides a re-entrant interface for pages that do not
* contain any metadata that LLADD understands.
*
* @ingroup LLADD_CORE
* $Id$
STRUCTURE OF A RAW PAGE
<pre>
+----------------------------------------------------------------------+
| |
| USABLE SPACE |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
+----------------------------------------------------------------------+
</pre>
*/
#include <lladd/common.h>
#include "../page.h"
#define units_from_start_raw(unit,page,count) (((unit*)(page->memAddr))+(count))
/**
@return a pointer to the buffer memory that stores this page.
*/
byte* rawPageGetData(int xid, Page * page);
/**
Inform the raw page implementation that the page is dirty.
*/
void rawPageSetData(int xid, lsn_t lsn, Page * page);
lsn_t rawPageReadLSN(const Page * page);
void rawPageWriteLSN(int xid, Page * page, lsn_t lsn);
/** Should be called when a transaction that has touched this page commits. */
void rawPageCommit(int xid);
/** Should be called when a transaction that has touched this page aborts. */
void rawPageAbort(int xid);

View file

@ -122,20 +122,13 @@ void pageWrite(Page * ret) {
pthread_mutex_unlock(&stable_mutex); pthread_mutex_unlock(&stable_mutex);
} }
/** @todo O_DIRECT is broken on old (pre 2.6.2ish?) linux, so it's disabled until the build script can be improved. :( */ /** @todo O_DIRECT is broken in older linuxes (eg 2.4). The build script should disable it on such platforms. */
void openPageFile() { void openPageFile() {
DEBUG("Opening storefile.\n"); DEBUG("Opening storefile.\n");
#if HAVE_POSIX_MEMALIGN stable = open (STORE_FILE, O_CREAT | O_RDWR | O_DIRECT, S_IRWXU | S_IRWXG | S_IRWXO);
// O_DIRECT is broken under linux 2.4..
stable = open (STORE_FILE, O_CREAT | O_RDWR/* | O_DIRECT*/, S_IRWXU | S_IRWXG | S_IRWXO);
// printf("WARNING: LLADD IS USING O_DIRECT!!!\n");
#else
//#warn Not using O_DIRECT
// If we don't have posix_memalign(), then we aren't aligning our pages in memory, and can't use O_DIRECT.
stable = open (STORE_FILE, O_CREAT | O_RDWR, S_IRWXU | S_IRWXG | S_IRWXO);
#endif
if(stable == -1) { if(stable == -1) {
perror("couldn't open storefile"); perror("couldn't open storefile");
fflush(NULL); fflush(NULL);