From 11c311bc9105822385a6ad69a44858fd279e7b5c Mon Sep 17 00:00:00 2001 From: Sears Russell Date: Wed, 12 Apr 2006 01:41:35 +0000 Subject: [PATCH] Added support for raw pages, implemented posix_memalign workaround, and re-enabled O_DIRECT by default --- benchmarks/Makefile.am | 2 +- benchmarks/zeroCopy.c | 55 ++++++++++++++++++++++++ lladd/logger/logger2.h | 2 + src/lladd/Makefile.am | 2 +- src/lladd/bufferPool.c | 34 +++++++-------- src/lladd/page.c | 33 +------------- src/lladd/page.h | 1 - src/lladd/page/raw.c | 40 +++++++++++++++++ src/lladd/page/raw.h | 98 ++++++++++++++++++++++++++++++++++++++++++ src/lladd/pageFile.c | 13 ++---- 10 files changed, 218 insertions(+), 62 deletions(-) create mode 100644 benchmarks/zeroCopy.c create mode 100644 src/lladd/page/raw.c create mode 100644 src/lladd/page/raw.h diff --git a/benchmarks/Makefile.am b/benchmarks/Makefile.am index 1554eec..2dfcc8d 100644 --- a/benchmarks/Makefile.am +++ b/benchmarks/Makefile.am @@ -3,7 +3,7 @@ LDADD=$(top_builddir)/src/2pc/lib2pc.a $(top_builddir)/src/libdfa/libdfa.a \ $(top_builddir)/src/libdfa/librw.a bin_PROGRAMS=naiveHash logicalHash readLogicalHash naiveMultiThreaded logicalMultThreaded rawSet \ arrayListSet logicalMultiReaders linearHashNTA linkedListNTA pageOrientedListNTA \ - linearHashNTAThreaded linearHashNTAMultiReader linearHashNTAWriteRequests transitiveClosure + linearHashNTAThreaded linearHashNTAMultiReader linearHashNTAWriteRequests transitiveClosure zeroCopy AM_CFLAGS= -g -Wall -pedantic -std=gnu99 SUBDIRS=berkeleyDB diff --git a/benchmarks/zeroCopy.c b/benchmarks/zeroCopy.c new file mode 100644 index 0000000..ff8d900 --- /dev/null +++ b/benchmarks/zeroCopy.c @@ -0,0 +1,55 @@ +#include +#include +#include +#include +#include +#include "../src/lladd/page/raw.h" +#include + +int main(int argc, char** argv) { + assert(argc == 3); + +#define ZEROCOPY 0 +#define LSNMODE 1 + + int mode = atoi(argv[1]); + + int count = atoi(argv[2]); + + int longsPerPage = PAGE_SIZE / sizeof(long); + + if(ZEROCOPY == mode) { + printf("Running ZEROCOPY mode. Count = %d\n", count); + } + + Tinit(); + int xid = Tbegin(); + + long * buf = malloc(longsPerPage * sizeof(long)); + + for(int i = 0; i < count; i++) { + int pageNum = TpageAlloc(xid); + + Page * p = loadPage(xid, pageNum); + if(ZEROCOPY == mode) { + long * data = (long*) rawPageGetData(xid, p); + for(int j = 0; j < longsPerPage; j++) { + data[j] = j; + } + rawPageSetData(xid, 0, p); + } else if(LSNMODE == mode) { + long * data = (long*) rawPageGetData(xid, p); + memcpy(buf, data, PAGE_SIZE); + for(int j = 0; j < longsPerPage; j++) { + buf[j] = j; + } + memcpy(data, buf, PAGE_SIZE); + + rawPageSetData(xid, 0, p); + } + releasePage(p); + + } + Tcommit(xid); + Tdeinit(); +} diff --git a/lladd/logger/logger2.h b/lladd/logger/logger2.h index c274dc9..de39bb6 100644 --- a/lladd/logger/logger2.h +++ b/lladd/logger/logger2.h @@ -95,6 +95,8 @@ int LogDeinit(); void LogForce(lsn_t lsn); void LogTruncate(lsn_t lsn); +lsn_t LogFlushedLSN(); + lsn_t LogTruncationPoint(); diff --git a/src/lladd/Makefile.am b/src/lladd/Makefile.am index 494414e..06e43cc 100644 --- a/src/lladd/Makefile.am +++ b/src/lladd/Makefile.am @@ -8,7 +8,7 @@ liblladd_a_SOURCES=crc32.c common.c stats.c io.c bufferManager.c linkedlist.c op lockManager.c iterator.c consumer.c arrayCollection.c ringbuffer.c fifo.c multiplexer.c graph.c\ logger/logEntry.c logger/logWriter.c logger/inMemoryLog.c logger/logHandle.c logger/logger2.c \ logger/logMemory.c \ - page/slotted.c page/header.c page/fixed.c compensations.c \ + page/raw.c page/slotted.c page/header.c page/fixed.c compensations.c \ operations/pageOperations.c page/indirect.c operations/decrement.c \ operations/increment.c operations/prepare.c operations/set.c \ operations/alloc.c operations/noop.c operations/instantSet.c \ diff --git a/src/lladd/bufferPool.c b/src/lladd/bufferPool.c index 276a43f..959b4ff 100644 --- a/src/lladd/bufferPool.c +++ b/src/lladd/bufferPool.c @@ -57,6 +57,8 @@ terms specified in this license. static int nextPage = 0; static pthread_mutex_t pageMallocMutex; +static void * addressFromMalloc = 0; + /** We need one dummy page for locking purposes, so this array has one extra page in it. */ Page pool[MAX_BUFFER_SIZE+1]; @@ -67,34 +69,32 @@ void bufferPoolInit() { pthread_mutex_init(&pageMallocMutex, NULL); + byte * bufferSpace ; + +#ifdef HAVE_POSIX_MEMALIGN + int ret = posix_memalign((void*)&bufferSpace, PAGE_SIZE, PAGE_SIZE * (MAX_BUFFER_SIZE + 1)); + assert(!ret); + addressFromMalloc = bufferSpace; +#else + bufferSpace = malloc(PAGE_SIZE * (MAX_BUFFER_SIZE + 2)); + assert(bufferSpace); + addressFromMalloc = bufferSpace; + bufferSpace += PAGE_SIZE - (bufferSpace % PAGE_SIZE); +#endif + for(int i = 0; i < MAX_BUFFER_SIZE+1; i++) { pool[i].rwlatch = initlock(); pool[i].loadlatch = initlock(); - /** @todo The buffer pool should be allocated as a contiguous - unit. This would also allow us to work around systems that - lack posix_memalign...*/ -#ifdef HAVE_POSIX_MEMALIGN - // Note that the backup behavior for posix_memalign breaks O_DIRECT. Therefore, O_DIRECT should be - // disabled whenever posix_memalign is not present. - int ret = posix_memalign((void*)(&(pool[i].memAddr)), PAGE_SIZE, PAGE_SIZE); - assert(!ret); -#else -//#warn Not using posix_memalign - pool[i].memAddr = malloc(PAGE_SIZE); - assert(pool[i].memAddr); -#endif + pool[i].memAddr = &(bufferSpace[i*PAGE_SIZE]); } - // pthread_mutex_init(&lastAllocedPage_mutex , NULL); - - // lastAllocedPage = 0; } void bufferPoolDeInit() { for(int i = 0; i < MAX_BUFFER_SIZE+1; i++) { deletelock(pool[i].rwlatch); deletelock(pool[i].loadlatch); - free(pool[i].memAddr); // breaks efence } + free(addressFromMalloc); // breaks efence pthread_mutex_destroy(&pageMallocMutex); } diff --git a/src/lladd/page.c b/src/lladd/page.c index 8f48ce1..0ff250e 100644 --- a/src/lladd/page.c +++ b/src/lladd/page.c @@ -91,15 +91,6 @@ terms specified in this license. #include void pageWriteLSN(int xid, Page * page, lsn_t lsn) { - /* unlocked since we're only called by a function that holds the writelock. */ - /* *(long *)(page->memAddr + START_OF_LSN) = page->LSN; */ - - /* tr y { - if(globalLockM anager.writ eLockPage) { - globalLock Manager.writeL ockPage(xid, page->id); - } - } en d; */ - if(page->LSN < lsn) { page->LSN = lsn; *lsn_ptr(page) = page->LSN; @@ -119,10 +110,6 @@ lsn_t pageReadLSN(const Page * page) { return ret; } - - -/* ----- end static functions ----- */ - /* ----- (de)initialization functions. Do not need to support multithreading. -----*/ /** @@ -150,14 +137,6 @@ void writeRecord(int xid, Page * p, lsn_t lsn, recordid rid, const void *dat) { assert( (p->id == rid.page) && (p->memAddr != NULL) ); - - /* writelock(p->rwlatch, 225); // Need a writelock so that we can update the lsn. - int lock_ret = pageWriteLSN(xid, p, lsn); - unlock(p->rwlatch); - if(lock_ret) { - return lock_ret; - } */ - writelock(p->rwlatch, 225); pageWriteLSN(xid, p, lsn); unlock(p->rwlatch); @@ -227,9 +206,8 @@ int getRecordTypeUnlocked(int xid, Page * p, recordid rid) { return UNINITIALIZED_RECORD; } else if(page_type == SLOTTED_PAGE) { - if(*numslots_ptr(p) <= rid.slot || *slot_ptr(p, rid.slot) == INVALID_SLOT /*|| *slot_length_ptr(p, rid.slot) == INVALID_SLOT*/) { + if(*numslots_ptr(p) <= rid.slot || *slot_ptr(p, rid.slot) == INVALID_SLOT) { return UNINITIALIZED_PAGE; - // } else if(*slot_length_ptr(p, rid.slot) == BLOB_REC_SIZE) { } else if (*slot_length_ptr(p, rid.slot) == BLOB_SLOT) { return BLOB_RECORD; } else { @@ -268,15 +246,6 @@ void writeRecordUnlocked(int xid, Page * p, lsn_t lsn, recordid rid, const void assert( (p->id == rid.page) && (p->memAddr != NULL) ); - /* writelock(p->rwlatch, 225); // Need a writelock so that we can update the lsn. - int lock_error = pageWriteLSN(xid, p, lsn); - if(lock_error) { - unlock(p->rwlatch); - return lock_error; - } - unlock(p->rwlatch); */ - - // Need a writelock so that we can update the lsn. writelock(p->rwlatch, 225); diff --git a/src/lladd/page.h b/src/lladd/page.h index a0917e3..808585a 100644 --- a/src/lladd/page.h +++ b/src/lladd/page.h @@ -112,7 +112,6 @@ BEGIN_C_DECLS struct Page_s { /** @todo Shouldn't Page.id be a long? */ int id; - /** @todo The Page.LSN field seems extraneous. Why do we need it? */ lsn_t LSN; byte *memAddr; byte dirty; diff --git a/src/lladd/page/raw.c b/src/lladd/page/raw.c new file mode 100644 index 0000000..b592389 --- /dev/null +++ b/src/lladd/page/raw.c @@ -0,0 +1,40 @@ +#include "page/raw.h" +#include +/** + @todo Should rawPageInferMetadata set a page type in the Page + struct? +*/ +void rawPageInferMetadata(Page * p) { + p->LSN = LogFlushedLSN(); +} + +byte* rawPageGetData(int xid, Page * p) { + return units_from_start_raw(byte, p, 0); +} + +void rawPageSetData(int xid, lsn_t lsn, Page * p) { + writelock(p->rwlatch, 255); + rawPageWriteLSN(xid, p, lsn); + p->dirty = 1; + unlock(p->rwlatch); + return; +} + +lsn_t rawPageReadLSN(const Page * p) { + // There are some potential optimizations here since the page + // doesn't "really" have an LSN at all, but we need to be careful + // about log truncation... + return p->LSN; +} + +void rawPageWriteLSN(int xid, Page * p, lsn_t lsn) { + if(p->LSN < lsn) { p->LSN = lsn; } +} + +void rawPageCommit(int xid) { + // no-op +} + +void rawPageAbort(int xid) { + // no-op +} diff --git a/src/lladd/page/raw.h b/src/lladd/page/raw.h new file mode 100644 index 0000000..1db9957 --- /dev/null +++ b/src/lladd/page/raw.h @@ -0,0 +1,98 @@ +/*--- +This software is copyrighted by the Regents of the University of +California, and other parties. The following terms apply to all files +associated with the software unless explicitly disclaimed in +individual files. + +The authors hereby grant permission to use, copy, modify, distribute, +and license this software and its documentation for any purpose, +provided that existing copyright notices are retained in all copies +and that this notice is included verbatim in any distributions. No +written agreement, license, or royalty fee is required for any of the +authorized uses. Modifications to this software may be copyrighted by +their authors and need not follow the licensing terms described here, +provided that the new terms are clearly indicated on the first page of +each file where they apply. + +IN NO EVENT SHALL THE AUTHORS OR DISTRIBUTORS BE LIABLE TO ANY PARTY +FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES +ARISING OUT OF THE USE OF THIS SOFTWARE, ITS DOCUMENTATION, OR ANY +DERIVATIVES THEREOF, EVEN IF THE AUTHORS HAVE BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. + +THE AUTHORS AND DISTRIBUTORS SPECIFICALLY DISCLAIM ANY WARRANTIES, +INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND +NON-INFRINGEMENT. THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, AND +THE AUTHORS AND DISTRIBUTORS HAVE NO OBLIGATION TO PROVIDE +MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + +GOVERNMENT USE: If you are acquiring this software on behalf of the +U.S. government, the Government shall have only "Restricted Rights" in +the software and related documentation as defined in the Federal +Acquisition Regulations (FARs) in Clause 52.227.19 (c) (2). If you are +acquiring the software on behalf of the Department of Defense, the +software shall be classified as "Commercial Computer Software" and the +Government shall have only "Restricted Rights" as defined in Clause +252.227-7013 (c) (1) of DFARs. Notwithstanding the foregoing, the +authors grant the U.S. Government and others acting in its behalf +permission to use and distribute the software in accordance with the +terms specified in this license. +---*/ + +/** + * @file + * + * interface for dealing with raw pages + * + * This file provides a re-entrant interface for pages that do not + * contain any metadata that LLADD understands. + * + * @ingroup LLADD_CORE + * $Id$ + + STRUCTURE OF A RAW PAGE +
+ +----------------------------------------------------------------------+
+ |                                                                      |
+ |  USABLE SPACE                                                        |
+ |                                                                      |
+ |                                                                      |
+ |                                                                      |
+ |                                                                      |
+ |                                                                      |
+ |                                                                      |
+ |                                                                      |
+ |                                                                      |
+ |                                                                      |
+ |                                                                      |
+ |                                                                      |
+ |                                                                      |
+ |                                                                      |
+ |                                                                      |
+ +----------------------------------------------------------------------+
+
+ */ + +#include +#include "../page.h" + +#define units_from_start_raw(unit,page,count) (((unit*)(page->memAddr))+(count)) + +/** + @return a pointer to the buffer memory that stores this page. +*/ +byte* rawPageGetData(int xid, Page * page); +/** + Inform the raw page implementation that the page is dirty. +*/ +void rawPageSetData(int xid, lsn_t lsn, Page * page); + +lsn_t rawPageReadLSN(const Page * page); + +void rawPageWriteLSN(int xid, Page * page, lsn_t lsn); + +/** Should be called when a transaction that has touched this page commits. */ +void rawPageCommit(int xid); +/** Should be called when a transaction that has touched this page aborts. */ +void rawPageAbort(int xid); diff --git a/src/lladd/pageFile.c b/src/lladd/pageFile.c index b16eded..0e34a1d 100644 --- a/src/lladd/pageFile.c +++ b/src/lladd/pageFile.c @@ -122,20 +122,13 @@ void pageWrite(Page * ret) { pthread_mutex_unlock(&stable_mutex); } -/** @todo O_DIRECT is broken on old (pre 2.6.2ish?) linux, so it's disabled until the build script can be improved. :( */ +/** @todo O_DIRECT is broken in older linuxes (eg 2.4). The build script should disable it on such platforms. */ void openPageFile() { DEBUG("Opening storefile.\n"); -#if HAVE_POSIX_MEMALIGN - // O_DIRECT is broken under linux 2.4.. - stable = open (STORE_FILE, O_CREAT | O_RDWR/* | O_DIRECT*/, S_IRWXU | S_IRWXG | S_IRWXO); - // printf("WARNING: LLADD IS USING O_DIRECT!!!\n"); -#else -//#warn Not using O_DIRECT - // If we don't have posix_memalign(), then we aren't aligning our pages in memory, and can't use O_DIRECT. - stable = open (STORE_FILE, O_CREAT | O_RDWR, S_IRWXU | S_IRWXG | S_IRWXO); -#endif + stable = open (STORE_FILE, O_CREAT | O_RDWR | O_DIRECT, S_IRWXU | S_IRWXG | S_IRWXO); + if(stable == -1) { perror("couldn't open storefile"); fflush(NULL);