92b7c3a6ef
that validate conversion to unsigned int and prints an error message on stderr if not used (would prefer to use SASL but it's as good as it gets during driver setup).
2203 lines
69 KiB
C
2203 lines
69 KiB
C
/* -------------------------------------------------------------------
|
|
*
|
|
* bdberl: Berkeley DB Driver for Erlang
|
|
* Copyright (c) 2008 The Hive. All rights reserved.
|
|
*
|
|
* ------------------------------------------------------------------- */
|
|
|
|
#include <assert.h>
|
|
#include <stdio.h>
|
|
#include <stdarg.h>
|
|
#include <string.h>
|
|
#include <stdint.h>
|
|
#include <stdlib.h>
|
|
#include <limits.h>
|
|
#include <time.h>
|
|
#include <fcntl.h>
|
|
#include <unistd.h>
|
|
#include <errno.h>
|
|
#include <sys/time.h>
|
|
#include <sys/select.h>
|
|
#include <sys/statvfs.h>
|
|
|
|
#include "hive_hash.h"
|
|
#include "bdberl_drv.h"
|
|
#include "bdberl_stats.h"
|
|
#include "bin_helper.h"
|
|
|
|
/**
|
|
* Driver functions
|
|
*/
|
|
static ErlDrvData bdberl_drv_start(ErlDrvPort port, char* buffer);
|
|
|
|
static void bdberl_drv_stop(ErlDrvData handle);
|
|
|
|
static void bdberl_drv_finish();
|
|
|
|
static int bdberl_drv_control(ErlDrvData handle, unsigned int cmd,
|
|
char* inbuf, int inbuf_sz,
|
|
char** outbuf, int outbuf_sz);
|
|
|
|
/**
|
|
* Driver Entry
|
|
*/
|
|
ErlDrvEntry bdberl_drv_entry =
|
|
{
|
|
NULL, /* F_PTR init, N/A */
|
|
bdberl_drv_start, /* L_PTR start, called when port is opened */
|
|
bdberl_drv_stop, /* F_PTR stop, called when port is closed */
|
|
NULL, /* F_PTR output, called when erlang has sent */
|
|
NULL, /* F_PTR ready_input, called when input descriptor ready */
|
|
NULL, /* F_PTR ready_output, called when output descriptor ready */
|
|
"bdberl_drv", /* driver_name */
|
|
bdberl_drv_finish, /* F_PTR finish, called when unloaded */
|
|
NULL, /* handle */
|
|
bdberl_drv_control, /* F_PTR control, port_command callback */
|
|
NULL, /* F_PTR timeout, reserved */
|
|
NULL, /* F_PTR outputv, reserved */
|
|
NULL, /* F_PTR ready_async */
|
|
NULL, /* F_PTR flush */
|
|
NULL, /* F_PTR call */
|
|
NULL, /* F_PTR event */
|
|
ERL_DRV_EXTENDED_MARKER,
|
|
ERL_DRV_EXTENDED_MAJOR_VERSION,
|
|
ERL_DRV_EXTENDED_MINOR_VERSION,
|
|
ERL_DRV_FLAG_USE_PORT_LOCKING,
|
|
NULL, /* Reserved */
|
|
NULL /* F_PTR process_exit */
|
|
};
|
|
|
|
/**
|
|
* Function prototypes
|
|
*/
|
|
static int check_non_neg_env(char *env, unsigned int *val_ptr);
|
|
static int check_pos_env(char *env, unsigned int *val_ptr);
|
|
|
|
static int open_database(const char* name, DBTYPE type, unsigned int flags, PortData* data, int* dbref_res);
|
|
static int close_database(int dbref, unsigned flags, PortData* data);
|
|
static int delete_database(const char* name);
|
|
|
|
static void get_info(int target, void* values, BinHelper* bh);
|
|
|
|
static void do_async_put(void* arg);
|
|
static void do_async_get(void* arg);
|
|
static void do_async_txnop(void* arg);
|
|
static void do_async_cursor_get(void* arg);
|
|
static void do_async_truncate(void* arg);
|
|
static void do_sync_data_dirs_info(PortData *p);
|
|
static void do_sync_driver_info(PortData *d);
|
|
|
|
static int send_dir_info(ErlDrvPort port, ErlDrvTermData pid, const char *path);
|
|
|
|
static int add_dbref(PortData* data, int dbref);
|
|
static int del_dbref(PortData* data, int dbref);
|
|
|
|
static int add_portref(int dbref, ErlDrvPort port);
|
|
static int del_portref(int dbref, ErlDrvPort port);
|
|
|
|
static int alloc_dbref();
|
|
static void abort_txn(PortData* d);
|
|
|
|
static void* zalloc(unsigned int size);
|
|
|
|
static void* deadlock_check(void* arg);
|
|
static void* checkpointer(void* arg);
|
|
|
|
static void bdb_errcall(const DB_ENV* dbenv, const char* errpfx, const char* msg);
|
|
static void bdb_msgcall(const DB_ENV* dbenv, const char* msg);
|
|
static void send_log_message(ErlDrvTermData* msg, int elements);
|
|
|
|
/**
|
|
* Global instance of DB_ENV; only a single one exists per O/S process.
|
|
*/
|
|
static DB_ENV* G_DB_ENV = 0;
|
|
|
|
|
|
/**
|
|
* Global variable to track the return code from opening the DB_ENV. We track this
|
|
* value so as to provide a useful error code when the user attempts to open the
|
|
* port and it fails due to an error that occurred when opening the environment.
|
|
*/
|
|
static int G_DB_ENV_ERROR = 0;
|
|
|
|
|
|
/**
|
|
* G_DATABASES is a global array of Database structs. Used to track currently opened DB*
|
|
* handles and ensure that they get cleaned up when all ports which were using them exit or
|
|
* explicitly close them.
|
|
*
|
|
* This array is allocated when the driver is first initialized and does not grow/shrink
|
|
* dynamically. G_DATABASES_SIZE contains the size of the array. G_DATABASES_NAMES is a hash of
|
|
* filenames to array index for an opened Database.
|
|
*
|
|
* All access to G_DATABASES and G_DATABASES_NAMES must be protected by the read/write lock
|
|
* G_DATABASES_RWLOCK.
|
|
*/
|
|
static Database* G_DATABASES = 0;
|
|
static unsigned int G_DATABASES_SIZE = 0;
|
|
static ErlDrvRWLock* G_DATABASES_RWLOCK = 0;
|
|
static hive_hash* G_DATABASES_NAMES = 0;
|
|
|
|
|
|
/**
|
|
* Deadlock detector thread variables. We run a single thread per VM to detect deadlocks within
|
|
* our global environment. G_DEADLOCK_CHECK_INTERVAL is the time between runs in milliseconds.
|
|
*/
|
|
static ErlDrvTid G_DEADLOCK_THREAD = 0;
|
|
static unsigned int G_DEADLOCK_CHECK_ACTIVE = 1;
|
|
static unsigned int G_DEADLOCK_CHECK_INTERVAL = 100; /* 100 milliseconds */
|
|
|
|
|
|
/**
|
|
* Trickle writer for dirty pages. We run a single thread per VM to perform background
|
|
* trickling of dirty pages to disk. G_TRICKLE_INTERVAL is the time between runs in seconds.
|
|
*/
|
|
static unsigned int G_TRICKLE_ACTIVE = 1;
|
|
static unsigned int G_TRICKLE_INTERVAL = 60 * 5; /* Seconds between trickle writes */
|
|
static unsigned int G_TRICKLE_PERCENTAGE = 50; /* Desired % of clean pages in cache */
|
|
|
|
|
|
/**
|
|
* Transaction checkpoint monitor. We run a single thread per VM to flush transaction
|
|
* logs into the backing data store. G_CHECKPOINT_INTERVAL is the time between runs in seconds.
|
|
* TODO The interval should be configurable.
|
|
*/
|
|
static ErlDrvTid G_CHECKPOINT_THREAD = 0;
|
|
static unsigned int G_CHECKPOINT_ACTIVE = 1;
|
|
static unsigned int G_CHECKPOINT_INTERVAL = 60 * 60; /* Seconds between checkpoints */
|
|
|
|
/**
|
|
* Pipe to used to wake up the various monitors. Instead of just sleeping
|
|
* they wait for an exceptional condition on the read fd of the pipe. When it is time to
|
|
* shutdown, the driver closes the write fd and waits for the threads to be joined.
|
|
*/
|
|
static int G_BDBERL_PIPE[2] = {-1, -1};
|
|
|
|
/**
|
|
* Lock, port and pid reference for relaying BDB output into the SASL logger. READ lock
|
|
* is required to log data. WRITE lock is used when replacing the pid/port reference. If
|
|
* no pid/port is available, no callback is registered with BDB.
|
|
*/
|
|
static ErlDrvRWLock* G_LOG_RWLOCK = 0;
|
|
static ErlDrvTermData G_LOG_PID;
|
|
static ErlDrvPort G_LOG_PORT;
|
|
|
|
/**
|
|
* Default page size to use for newly created databases
|
|
*/
|
|
static unsigned int G_PAGE_SIZE = 0;
|
|
|
|
/** Thread pools
|
|
*
|
|
*/
|
|
static unsigned int G_NUM_GENERAL_THREADS = 10;
|
|
static unsigned int G_NUM_TXN_THREADS = 10;
|
|
static TPool* G_TPOOL_GENERAL = NULL;
|
|
static TPool* G_TPOOL_TXNS = NULL;
|
|
|
|
|
|
/**
|
|
* Helpful macros
|
|
*/
|
|
#define READ_LOCK(L) erl_drv_rwlock_rlock(L)
|
|
#define READ_UNLOCK(L) erl_drv_rwlock_runlock(L)
|
|
#define PROMOTE_READ_LOCK(L) erl_drv_rwlock_runlock(L); erl_drv_rwlock_rwlock(L)
|
|
#define WRITE_LOCK(L) erl_drv_rwlock_rwlock(L)
|
|
#define WRITE_UNLOCK(L) erl_drv_rwlock_rwunlock(L)
|
|
|
|
#ifdef DEBUG
|
|
# define DBG(...) fprintf(stderr, __VA_ARGS__)
|
|
static void DBGCMD(PortData *d, const char *fmt, ...);
|
|
static void DBGCMDRC(PortData *d, int rc);
|
|
#else
|
|
# define DBG(arg1,...)
|
|
# define DBGCMD(d, fmt, ...)
|
|
# define DBGCMDRC(d, rc) { while (0) { rc++; } } // otherwise get unused variable error
|
|
#endif
|
|
|
|
|
|
DRIVER_INIT(bdberl_drv)
|
|
{
|
|
DBG("DRIVER INIT\r\n");
|
|
// Setup flags we'll use to init the environment
|
|
int flags =
|
|
DB_INIT_LOCK | /* Enable support for locking */
|
|
DB_INIT_TXN | /* Enable support for transactions */
|
|
DB_INIT_MPOOL | /* Enable support for memory pools */
|
|
DB_RECOVER | /* Enable support for recovering from failures */
|
|
DB_CREATE | /* Create files as necessary */
|
|
DB_REGISTER | /* Run recovery if needed */
|
|
DB_USE_ENVIRON | /* Use DB_HOME environment variable */
|
|
DB_THREAD; /* Make the environment free-threaded */
|
|
|
|
// Check for environment flag which indicates we want to use DB_SYSTEM_MEM
|
|
char value[1];
|
|
size_t value_size = sizeof(value);
|
|
if (erl_drv_getenv("BDBERL_SYSTEM_MEM", value, &value_size) >= 0)
|
|
{
|
|
flags |= DB_SYSTEM_MEM;
|
|
}
|
|
|
|
// Initialize global environment -- use environment variable DB_HOME to
|
|
// specify where the working directory is
|
|
DBG("db_env_create(%p, 0)", &G_DB_ENV);
|
|
G_DB_ENV_ERROR = db_env_create(&G_DB_ENV, 0);
|
|
DBG(" = %d\r\n", G_DB_ENV_ERROR);
|
|
if (G_DB_ENV_ERROR != 0)
|
|
{
|
|
G_DB_ENV = 0;
|
|
}
|
|
else
|
|
{
|
|
DBG("G_DB_ENV->open(%p, 0, %08X, 0)", &G_DB_ENV, flags);
|
|
G_DB_ENV_ERROR = G_DB_ENV->open(G_DB_ENV, 0, flags, 0);
|
|
DBG(" = %d\r\n", G_DB_ENV_ERROR);
|
|
if (G_DB_ENV_ERROR != 0)
|
|
{
|
|
// Something bad happened while initializing BDB; in this situation we
|
|
// cleanup and set the environment to zero. Attempts to open ports will
|
|
// fail and the user will have to sort out how to resolve the issue.
|
|
DBG("G_DB_ENV->close(%p, 0);\r\n", &G_DB_ENV);
|
|
G_DB_ENV->close(G_DB_ENV, 0);
|
|
G_DB_ENV = 0;
|
|
}
|
|
}
|
|
|
|
if (G_DB_ENV_ERROR == 0)
|
|
{
|
|
// Pipe for signalling the utility threads all is over.
|
|
assert(0 == pipe(G_BDBERL_PIPE));
|
|
|
|
// Use the BDBERL_MAX_DBS environment value to determine the max # of
|
|
// databases to permit the VM to open at once. Defaults to 1024.
|
|
G_DATABASES_SIZE = 1024;
|
|
check_pos_env("BDBERL_MAX_DBS", &G_DATABASES_SIZE);
|
|
|
|
// Use the BDBERL_TRICKLE_TIME and BDBERL_TRICKLE_PERCENTAGE to control how often
|
|
// the trickle writer runs and what percentage of pages should be flushed.
|
|
check_pos_env("BDBERL_TRICKLE_TIME", &G_TRICKLE_INTERVAL);
|
|
check_pos_env("BDBERL_TRICKLE_PERCENTAGE", &G_TRICKLE_PERCENTAGE);
|
|
|
|
// Set the deadlock interval
|
|
check_pos_env("BDBERL_DEADLOCK_CHECK_INTERVAL", &G_DEADLOCK_CHECK_INTERVAL);
|
|
|
|
// Initialize default page size
|
|
unsigned int page_size;
|
|
if (check_pos_env("BDBERL_PAGE_SIZE", &page_size))
|
|
{
|
|
if (page_size != 0 && ((page_size & (~page_size +1)) == page_size))
|
|
{
|
|
G_PAGE_SIZE = page_size;
|
|
}
|
|
else
|
|
{
|
|
fprintf(stderr, "Ignoring \"BDBERL_PAGE_SIZE\" value %u - not power of 2\r\n",
|
|
page_size);
|
|
}
|
|
}
|
|
|
|
// Make sure we can distiguish between lock timeouts and deadlocks
|
|
G_DB_ENV->set_flags(G_DB_ENV, DB_TIME_NOTGRANTED, 1);
|
|
|
|
// Initialization transaction timeout so that deadlock checking works properly
|
|
db_timeout_t to = 500 * 1000; // 500 ms
|
|
G_DB_ENV->set_timeout(G_DB_ENV, to, DB_SET_TXN_TIMEOUT);
|
|
|
|
// BDB is setup -- allocate structures for tracking databases
|
|
G_DATABASES = (Database*) driver_alloc(sizeof(Database) * G_DATABASES_SIZE);
|
|
memset(G_DATABASES, '\0', sizeof(Database) * G_DATABASES_SIZE);
|
|
G_DATABASES_RWLOCK = erl_drv_rwlock_create("bdberl_drv: G_DATABASES_RWLOCK");
|
|
G_DATABASES_NAMES = hive_hash_new(G_DATABASES_SIZE);
|
|
|
|
// Startup deadlock check thread
|
|
erl_drv_thread_create("bdberl_drv_deadlock_checker", &G_DEADLOCK_THREAD,
|
|
&deadlock_check, 0, 0);
|
|
|
|
// Use the BDBERL_CHECKPOINT_TIME environment value to determine the
|
|
// interval between transaction checkpoints. Defaults to 1 hour.
|
|
check_pos_env("BDBERL_CHECKPOINT_TIME", &G_CHECKPOINT_INTERVAL);
|
|
|
|
// Startup checkpoint thread
|
|
erl_drv_thread_create("bdberl_drv_checkpointer", &G_CHECKPOINT_THREAD,
|
|
&checkpointer, 0, 0);
|
|
|
|
// Startup our thread pools
|
|
check_pos_env("BDBERL_NUM_GENERAL_THREADS", &G_NUM_GENERAL_THREADS);
|
|
G_TPOOL_GENERAL = bdberl_tpool_start(G_NUM_GENERAL_THREADS);
|
|
|
|
check_pos_env("BDBERL_NUM_TXN_THREADS", &G_NUM_TXN_THREADS);
|
|
G_TPOOL_TXNS = bdberl_tpool_start(G_NUM_TXN_THREADS);
|
|
|
|
// Initialize logging lock and refs
|
|
G_LOG_RWLOCK = erl_drv_rwlock_create("bdberl_drv: G_LOG_RWLOCK");
|
|
G_LOG_PORT = 0;
|
|
G_LOG_PID = 0;
|
|
}
|
|
else
|
|
{
|
|
DBG("DRIVER INIT FAILED - %s\r\n", db_strerror(G_DB_ENV_ERROR));
|
|
}
|
|
|
|
return &bdberl_drv_entry;
|
|
}
|
|
|
|
static ErlDrvData bdberl_drv_start(ErlDrvPort port, char* buffer)
|
|
{
|
|
DBG("threadid %p port %p: BDB DRIVER STARTING\r\n",
|
|
erl_drv_thread_self(), port);
|
|
|
|
// Make sure we have a functional environment -- if we don't,
|
|
// bail...
|
|
if (!G_DB_ENV)
|
|
{
|
|
return ERL_DRV_ERROR_BADARG;
|
|
}
|
|
|
|
PortData* d = (PortData*)driver_alloc(sizeof(PortData));
|
|
memset(d, '\0', sizeof(PortData));
|
|
|
|
// Save handle to the port
|
|
d->port = port;
|
|
|
|
// Allocate a mutex for the port
|
|
d->port_lock = erl_drv_mutex_create("bdberl_port_lock");
|
|
|
|
// Save the caller/owner PID
|
|
d->port_owner = driver_connected(port);
|
|
|
|
// Allocate an initial buffer for work purposes
|
|
d->work_buffer = driver_alloc(4096);
|
|
d->work_buffer_sz = 4096;
|
|
|
|
// Make sure port is running in binary mode
|
|
set_port_control_flags(port, PORT_CONTROL_FLAG_BINARY);
|
|
|
|
DBGCMD(d, "BDB DRIVER STARTED");
|
|
|
|
return (ErlDrvData)d;
|
|
}
|
|
|
|
static void bdberl_drv_stop(ErlDrvData handle)
|
|
{
|
|
PortData* d = (PortData*)handle;
|
|
|
|
DBG("Stopping port %p\r\n", d->port);
|
|
|
|
// Grab the port lock, in case we have an async job running
|
|
erl_drv_mutex_lock(d->port_lock);
|
|
|
|
// If there is an async job pending, we need to cancel it. The cancel operation will
|
|
// block until the job has either been removed or has run
|
|
if (d->async_job)
|
|
{
|
|
// Drop the lock prior to starting the wait for the async process
|
|
erl_drv_mutex_unlock(d->port_lock);
|
|
|
|
DBG("Cancelling async job for port: %p\r\n", d->port);
|
|
bdberl_tpool_cancel(d->async_pool, d->async_job);
|
|
DBG("Canceled async job for port: %p\r\n", d->port);
|
|
}
|
|
else
|
|
{
|
|
// If there was no async job, drop the lock -- not needed
|
|
erl_drv_mutex_unlock(d->port_lock);
|
|
}
|
|
|
|
// Cleanup the port lock
|
|
erl_drv_mutex_destroy(d->port_lock);
|
|
|
|
// If a cursor is open, close it
|
|
if (d->cursor)
|
|
{
|
|
d->cursor->close(d->cursor);
|
|
}
|
|
|
|
// If a txn is currently active, terminate it.
|
|
abort_txn(d);
|
|
|
|
// Close all the databases we previously opened
|
|
while (d->dbrefs)
|
|
{
|
|
close_database(d->dbrefs->dbref, 0, d);
|
|
}
|
|
|
|
// If this port was registered as the endpoint for logging, go ahead and
|
|
// remove it. Note that we don't need to lock to check this since we only
|
|
// unregister if it's already initialized to this port.
|
|
if (G_LOG_PORT == d->port)
|
|
{
|
|
WRITE_LOCK(G_LOG_RWLOCK);
|
|
|
|
// Remove the references
|
|
G_LOG_PORT = 0;
|
|
G_LOG_PID = 0;
|
|
|
|
// Unregister with BDB -- MUST DO THIS WITH WRITE LOCK HELD!
|
|
G_DB_ENV->set_msgcall(G_DB_ENV, 0);
|
|
G_DB_ENV->set_errcall(G_DB_ENV, 0);
|
|
|
|
WRITE_UNLOCK(G_LOG_RWLOCK);
|
|
}
|
|
|
|
DBG("Stopped port: %p\r\n", d->port);
|
|
|
|
// Release the port instance data
|
|
driver_free(d->work_buffer);
|
|
driver_free(handle);
|
|
}
|
|
|
|
static void bdberl_drv_finish()
|
|
{
|
|
DBG("BDB DRIVER FINISHING\r\n");
|
|
// Stop the thread pools
|
|
if (NULL != G_TPOOL_GENERAL)
|
|
{
|
|
bdberl_tpool_stop(G_TPOOL_GENERAL);
|
|
G_TPOOL_GENERAL = NULL;
|
|
}
|
|
|
|
if (NULL != G_TPOOL_TXNS)
|
|
{
|
|
bdberl_tpool_stop(G_TPOOL_TXNS);
|
|
G_TPOOL_TXNS = NULL;
|
|
}
|
|
|
|
// Signal the utility threads time is up
|
|
G_TRICKLE_ACTIVE = 0;
|
|
G_DEADLOCK_CHECK_ACTIVE = 0;
|
|
G_CHECKPOINT_ACTIVE = 0;
|
|
|
|
// Close the writer fd on the pipe to signal finish to the utility threads
|
|
if (-1 != G_BDBERL_PIPE[1])
|
|
{
|
|
close(G_BDBERL_PIPE[1]);
|
|
G_BDBERL_PIPE[1] = -1;
|
|
}
|
|
|
|
// Wait for the deadlock checker to shutdown -- then wait for it
|
|
if (0 != G_DEADLOCK_THREAD)
|
|
{
|
|
erl_drv_thread_join(G_DEADLOCK_THREAD, 0);
|
|
G_DEADLOCK_THREAD = 0;
|
|
}
|
|
|
|
// Wait for the checkpointer to shutdown -- then wait for it
|
|
if (0 != G_CHECKPOINT_THREAD)
|
|
{
|
|
erl_drv_thread_join(G_CHECKPOINT_THREAD, 0);
|
|
G_CHECKPOINT_THREAD = 0;
|
|
}
|
|
|
|
// Close the reader fd on the pipe now utility threads are closed
|
|
if (-1 != G_BDBERL_PIPE[0])
|
|
{
|
|
close(G_BDBERL_PIPE[0]);
|
|
}
|
|
G_BDBERL_PIPE[0] = -1;
|
|
|
|
// Cleanup and shut down the BDB environment. Note that we assume
|
|
// all ports have been released and thuse all databases/txns/etc are also gone.
|
|
if (NULL != G_DB_ENV)
|
|
{
|
|
G_DB_ENV->close(G_DB_ENV, 0);
|
|
G_DB_ENV = NULL;
|
|
}
|
|
if (NULL != G_DATABASES)
|
|
{
|
|
driver_free(G_DATABASES);
|
|
G_DATABASES = NULL;
|
|
}
|
|
if (NULL != G_DATABASES_RWLOCK)
|
|
{
|
|
erl_drv_rwlock_destroy(G_DATABASES_RWLOCK);
|
|
G_DATABASES_RWLOCK = NULL;
|
|
}
|
|
|
|
if (NULL != G_DATABASES_NAMES)
|
|
{
|
|
hive_hash_destroy(G_DATABASES_NAMES);
|
|
G_DATABASES_NAMES = NULL;
|
|
}
|
|
|
|
// Release the logging rwlock
|
|
if (NULL != G_LOG_RWLOCK)
|
|
{
|
|
erl_drv_rwlock_destroy(G_LOG_RWLOCK);
|
|
G_LOG_RWLOCK = NULL;
|
|
}
|
|
|
|
DBG("BDB DRIVER FINISHED\r\n");
|
|
}
|
|
|
|
static int bdberl_drv_control(ErlDrvData handle, unsigned int cmd,
|
|
char* inbuf, int inbuf_sz,
|
|
char** outbuf, int outbuf_sz)
|
|
{
|
|
PortData* d = (PortData*)handle;
|
|
switch(cmd)
|
|
{
|
|
case CMD_OPEN_DB:
|
|
{
|
|
// Extract the type code and filename from the inbuf
|
|
// Inbuf is: <<Flags:32/unsigned, Type:8, Name/bytes, 0:8>>
|
|
unsigned flags = UNPACK_INT(inbuf, 0);
|
|
DBTYPE type = (DBTYPE) UNPACK_BYTE(inbuf, 4);
|
|
char* name = UNPACK_STRING(inbuf, 5);
|
|
int dbref;
|
|
int rc = open_database(name, type, flags, d, &dbref);
|
|
|
|
// Queue up a message for bdberl:open to process
|
|
if (rc == 0) // success: send {ok, DbRef}
|
|
{
|
|
ErlDrvTermData response[] = { ERL_DRV_ATOM, driver_mk_atom("ok"),
|
|
ERL_DRV_INT, dbref,
|
|
ERL_DRV_TUPLE, 2};
|
|
driver_send_term(d->port, d->port_owner,
|
|
response, sizeof(response) / sizeof(response[0]));
|
|
}
|
|
else // failure: send {error, atom() | {error, {unknown, Rc}}
|
|
{
|
|
bdberl_send_rc(d->port, d->port_owner, rc);
|
|
}
|
|
// Outbuf is: <<Rc:32>> - always send 0 and the driver will receive the real value
|
|
RETURN_INT(0, outbuf);
|
|
}
|
|
case CMD_CLOSE_DB:
|
|
{
|
|
FAIL_IF_ASYNC_PENDING(d, outbuf);
|
|
FAIL_IF_CURSOR_OPEN(d, outbuf);
|
|
FAIL_IF_TXN_OPEN(d, outbuf);
|
|
|
|
// Take the provided dbref and attempt to close it
|
|
// Inbuf is: <<DbRef:32, Flags:32/unsigned>>
|
|
int dbref = UNPACK_INT(inbuf, 0);
|
|
unsigned flags = (unsigned) UNPACK_INT(inbuf, 4);
|
|
|
|
int rc = close_database(dbref, flags, d);
|
|
|
|
// Queue up a message for bdberl:close to process
|
|
bdberl_send_rc(d->port, d->port_owner, rc);
|
|
// Outbuf is: <<Rc:32>> - always send 0 and the driver will receive the real value
|
|
RETURN_INT(0, outbuf);
|
|
}
|
|
case CMD_TXN_BEGIN:
|
|
{
|
|
FAIL_IF_ASYNC_PENDING(d, outbuf);
|
|
FAIL_IF_TXN_OPEN(d, outbuf);
|
|
|
|
// Setup async command and schedule it on the txns threadpool
|
|
d->async_op = cmd;
|
|
d->async_flags = UNPACK_INT(inbuf, 0);
|
|
bdberl_txn_tpool_run(&do_async_txnop, d, 0, &d->async_job);
|
|
|
|
// Outbuf is <<Rc:32>>
|
|
RETURN_INT(0, outbuf);
|
|
}
|
|
case CMD_TXN_COMMIT:
|
|
case CMD_TXN_ABORT:
|
|
{
|
|
FAIL_IF_ASYNC_PENDING(d, outbuf);
|
|
FAIL_IF_NO_TXN(d, outbuf);
|
|
|
|
// Setup async command and schedule it on the txns threadpool
|
|
d->async_op = cmd;
|
|
if (cmd == CMD_TXN_COMMIT)
|
|
{
|
|
d->async_flags = UNPACK_INT(inbuf, 0);
|
|
}
|
|
bdberl_txn_tpool_run(&do_async_txnop, d, 0, &d->async_job);
|
|
|
|
// Outbuf is <<Rc:32>>
|
|
RETURN_INT(0, outbuf);
|
|
}
|
|
case CMD_PUT:
|
|
case CMD_GET:
|
|
case CMD_PUT_COMMIT:
|
|
{
|
|
FAIL_IF_ASYNC_PENDING(d, outbuf);
|
|
|
|
// Put/commit requires a transaction to be active
|
|
if (cmd == CMD_PUT_COMMIT && (!d->txn))
|
|
{
|
|
bdberl_send_rc(d->port, d->port_owner, ERROR_NO_TXN);
|
|
RETURN_INT(0, outbuf);
|
|
}
|
|
|
|
// Inbuf is: << DbRef:32, Rest/binary>>
|
|
int dbref = UNPACK_INT(inbuf, 0);
|
|
|
|
// Make sure this port currently has dbref open -- if it doesn't, error out. Of note,
|
|
// if it's in our list, we don't need to grab the RWLOCK, as we don't have to worry about
|
|
// the underlying handle disappearing since we have a reference.
|
|
if (bdberl_has_dbref(d, dbref))
|
|
{
|
|
// If the working buffer is large enough, copy the data to put/get into it. Otherwise, realloc
|
|
// until it is large enough
|
|
if (d->work_buffer_sz < inbuf_sz)
|
|
{
|
|
d->work_buffer = driver_realloc(d->work_buffer, inbuf_sz);
|
|
d->work_buffer_sz = inbuf_sz;
|
|
}
|
|
|
|
// Copy the payload into place
|
|
memcpy(d->work_buffer, inbuf, inbuf_sz);
|
|
d->work_buffer_offset = inbuf_sz;
|
|
|
|
// Mark the port as busy and then schedule the appropriate async operation
|
|
d->async_op = cmd;
|
|
TPoolJobFunc fn;
|
|
if (cmd == CMD_PUT || cmd == CMD_PUT_COMMIT)
|
|
{
|
|
fn = &do_async_put;
|
|
}
|
|
else
|
|
{
|
|
assert(cmd == CMD_GET);
|
|
fn = &do_async_get;
|
|
}
|
|
bdberl_general_tpool_run(fn, d, 0, &d->async_job);
|
|
|
|
// Let caller know that the operation is in progress
|
|
// Outbuf is: <<0:32>>
|
|
RETURN_INT(0, outbuf);
|
|
}
|
|
else
|
|
{
|
|
// Invalid dbref
|
|
bdberl_send_rc(d->port, d->port_owner, ERROR_INVALID_DBREF);
|
|
RETURN_INT(0, outbuf);
|
|
}
|
|
}
|
|
case CMD_GETINFO:
|
|
{
|
|
// Inbuf is: << Target:32, Values/binary >>
|
|
int target = UNPACK_INT(inbuf, 0);
|
|
char* values = UNPACK_BLOB(inbuf, 4);
|
|
|
|
// Execute the tuning -- the result to send back to the caller is wrapped
|
|
// up in the provided binhelper
|
|
BinHelper bh;
|
|
get_info(target, values, &bh);
|
|
RETURN_BH(bh, outbuf);
|
|
}
|
|
case CMD_CURSOR_OPEN:
|
|
{
|
|
FAIL_IF_ASYNC_PENDING(d, outbuf);
|
|
FAIL_IF_CURSOR_OPEN(d, outbuf);
|
|
|
|
// Inbuf is << DbRef:32, Flags:32 >>
|
|
int dbref = UNPACK_INT(inbuf, 0);
|
|
unsigned int flags = UNPACK_INT(inbuf, 4);
|
|
|
|
// Make sure we have a reference to the requested database
|
|
if (bdberl_has_dbref(d, dbref))
|
|
{
|
|
// Grab the database handle and open the cursor
|
|
DB* db = G_DATABASES[dbref].db;
|
|
int rc = db->cursor(db, d->txn, &(d->cursor), flags);
|
|
bdberl_send_rc(d->port, d->port_owner, rc);
|
|
RETURN_INT(0, outbuf);
|
|
}
|
|
else
|
|
{
|
|
bdberl_send_rc(d->port, d->port_owner, ERROR_INVALID_DBREF);
|
|
RETURN_INT(0, outbuf);
|
|
}
|
|
}
|
|
case CMD_CURSOR_CURR:
|
|
case CMD_CURSOR_NEXT:
|
|
case CMD_CURSOR_PREV:
|
|
{
|
|
FAIL_IF_ASYNC_PENDING(d, outbuf);
|
|
FAIL_IF_NO_CURSOR(d, outbuf);
|
|
|
|
// Schedule the operation
|
|
d->async_op = cmd;
|
|
bdberl_general_tpool_run(&do_async_cursor_get, d, 0, &d->async_job);
|
|
|
|
// Let caller know operation is in progress
|
|
RETURN_INT(0, outbuf);
|
|
}
|
|
case CMD_CURSOR_CLOSE:
|
|
{
|
|
FAIL_IF_ASYNC_PENDING(d, outbuf);
|
|
FAIL_IF_NO_CURSOR(d, outbuf);
|
|
|
|
// It's possible to get a deadlock when closing a cursor -- in that situation we also
|
|
// need to go ahead and abort the txn
|
|
int rc = d->cursor->close(d->cursor);
|
|
if (d->txn && (rc == DB_LOCK_NOTGRANTED || rc == DB_LOCK_DEADLOCK))
|
|
{
|
|
abort_txn(d);
|
|
}
|
|
|
|
// Regardless of what happens, clear out the cursor pointer
|
|
d->cursor = 0;
|
|
|
|
// Send result code
|
|
bdberl_send_rc(d->port, d->port_owner, rc);
|
|
RETURN_INT(0, outbuf);
|
|
}
|
|
case CMD_REMOVE_DB:
|
|
{
|
|
FAIL_IF_ASYNC_PENDING(d, outbuf);
|
|
FAIL_IF_TXN_OPEN(d, outbuf);
|
|
|
|
// Inbuf is: << dbname/bytes, 0:8 >>
|
|
const char* dbname = UNPACK_STRING(inbuf, 0);
|
|
int rc = delete_database(dbname);
|
|
bdberl_send_rc(d->port, d->port_owner, rc);
|
|
RETURN_INT(0, outbuf);
|
|
}
|
|
case CMD_TRUNCATE:
|
|
{
|
|
FAIL_IF_ASYNC_PENDING(d, outbuf);
|
|
FAIL_IF_CURSOR_OPEN(d, outbuf);
|
|
|
|
// Inbuf is: <<DbRef:32>>
|
|
int dbref = UNPACK_INT(inbuf, 0);
|
|
|
|
// Make sure this port currently has dbref open -- if it doesn't, error out. Of note,
|
|
// if it's in our list, we don't need to grab the RWLOCK, as we don't have to worry about
|
|
// the underlying handle disappearing since we have a reference.
|
|
if (dbref == -1 || bdberl_has_dbref(d, dbref))
|
|
{
|
|
// Mark the port as busy and then schedule the appropriate async operation
|
|
d->async_op = cmd;
|
|
d->async_dbref = dbref;
|
|
bdberl_general_tpool_run(&do_async_truncate, d, 0, &d->async_job);
|
|
|
|
// Let caller know that the operation is in progress
|
|
// Outbuf is: <<0:32>>
|
|
RETURN_INT(0, outbuf);
|
|
}
|
|
else
|
|
{
|
|
// Invalid dbref
|
|
RETURN_INT(ERROR_INVALID_DBREF, outbuf);
|
|
}
|
|
}
|
|
case CMD_REGISTER_LOGGER:
|
|
{
|
|
// If this port is not the current logger, make it so. Only one logger can be registered
|
|
// at a time.
|
|
if (G_LOG_PORT != d->port)
|
|
{
|
|
// Grab the write lock and update the global vars; also make sure to update BDB callbacks
|
|
// within the write lock to avoid race conditions.
|
|
WRITE_LOCK(G_LOG_RWLOCK);
|
|
|
|
G_LOG_PORT = d->port;
|
|
G_LOG_PID = driver_connected(d->port);
|
|
|
|
G_DB_ENV->set_msgcall(G_DB_ENV, &bdb_msgcall);
|
|
G_DB_ENV->set_errcall(G_DB_ENV, &bdb_errcall);
|
|
|
|
WRITE_UNLOCK(G_LOG_RWLOCK);
|
|
}
|
|
*outbuf = 0;
|
|
return 0;
|
|
}
|
|
case CMD_DB_STAT: /*FALLTHRU*/
|
|
case CMD_DB_STAT_PRINT: /*FALLTHRU*/
|
|
case CMD_ENV_STAT_PRINT: /*FALLTHRU*/
|
|
case CMD_LOCK_STAT: /*FALLTHRU*/
|
|
case CMD_LOCK_STAT_PRINT: /*FALLTHRU*/
|
|
case CMD_LOG_STAT: /*FALLTHRU*/
|
|
case CMD_LOG_STAT_PRINT: /*FALLTHRU*/
|
|
case CMD_MEMP_STAT: /*FALLTHRU*/
|
|
case CMD_MEMP_STAT_PRINT: /*FALLTHRU*/
|
|
case CMD_MUTEX_STAT: /*FALLTHRU*/
|
|
case CMD_MUTEX_STAT_PRINT: /*FALLTHRU*/
|
|
case CMD_TXN_STAT: /*FALLTHRU*/
|
|
case CMD_TXN_STAT_PRINT: /*FALLTHRU*/
|
|
{
|
|
FAIL_IF_ASYNC_PENDING(d, outbuf);
|
|
return bdberl_stats_control(d, cmd, inbuf, inbuf_sz, outbuf, outbuf_sz);
|
|
}
|
|
case CMD_DATA_DIRS_INFO:
|
|
{
|
|
FAIL_IF_ASYNC_PENDING(d, outbuf);
|
|
|
|
do_sync_data_dirs_info(d);
|
|
|
|
// Let caller know that the operation is in progress
|
|
// Outbuf is: <<0:32>>
|
|
RETURN_INT(0, outbuf);
|
|
}
|
|
case CMD_LOG_DIR_INFO:
|
|
{
|
|
FAIL_IF_ASYNC_PENDING(d, outbuf);
|
|
|
|
// Find the log dir or use DB_HOME - error if not present
|
|
const char *lg_dir = NULL;
|
|
int rc = G_DB_ENV->get_lg_dir(G_DB_ENV, &lg_dir);
|
|
if (0 == rc && NULL == lg_dir)
|
|
{
|
|
rc = G_DB_ENV->get_home(G_DB_ENV, &lg_dir);
|
|
}
|
|
// Send info if we can get a dir, otherwise return the error
|
|
if (0 == rc)
|
|
{
|
|
// send a dirinfo message - will send an error message on a NULL lg_dir
|
|
send_dir_info(d->port, d->port_owner, lg_dir);
|
|
}
|
|
else
|
|
{
|
|
bdberl_send_rc(d->port, d->port_owner, rc);
|
|
}
|
|
|
|
// Let caller know that the operation is in progress
|
|
// Outbuf is: <<0:32>>
|
|
RETURN_INT(0, outbuf);
|
|
}
|
|
case CMD_DRIVER_INFO:
|
|
{
|
|
FAIL_IF_ASYNC_PENDING(d, outbuf);
|
|
|
|
do_sync_driver_info(d);
|
|
|
|
// Let caller know that the operation is in progress
|
|
// Outbuf is: <<0:32>>
|
|
RETURN_INT(0, outbuf);
|
|
}
|
|
}
|
|
*outbuf = 0;
|
|
return 0;
|
|
}
|
|
|
|
|
|
// Check if an environment variable is set to a non-negative value (>=0)
|
|
// Returns 1 and sets the destination of val_ptr to the converted value
|
|
// Otherwise returns 0
|
|
static int check_non_neg_env(char *env, unsigned int *val_ptr)
|
|
{
|
|
char val_str[64];
|
|
size_t val_size = sizeof(val_str);
|
|
|
|
if (erl_drv_getenv(env, val_str, &val_size) >= 0)
|
|
{
|
|
errno = 0;
|
|
long long val = strtoll(val_str, NULL, 0);
|
|
if (0 == val && EINVAL == errno)
|
|
{
|
|
fprintf(stderr, "Ignoring \"%s\" value \"%s\" - invalid value\r\n", env, val_str);
|
|
return 0;
|
|
}
|
|
if (0 >= val || val > UINT_MAX)
|
|
{
|
|
fprintf(stderr, "Ignoring \"%s\" value \"%lld\" - out of range\r\n", env, val);
|
|
return 0;
|
|
}
|
|
unsigned int uival = (unsigned int) val;
|
|
DBG("Using \"%s\" value %u\r\n", env, uival);
|
|
*val_ptr = uival;
|
|
return 1;
|
|
}
|
|
else
|
|
{
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
|
|
// Check if an environment variable is set to a positive value (>0)
|
|
// Returns 1 and sets the destination of val_ptr to the converted value
|
|
// Otherwise returns 0
|
|
|
|
static int check_pos_env(char *env, unsigned int *val_ptr)
|
|
{
|
|
unsigned int original_val = *val_ptr;
|
|
if (check_non_neg_env(env, val_ptr))
|
|
{
|
|
if (*val_ptr > 0)
|
|
{
|
|
return 1;
|
|
}
|
|
else
|
|
{
|
|
fprintf(stderr, "Ignoring \"%s\" value \"%u\" - out of range\r\n", env, *val_ptr);
|
|
*val_ptr = original_val;
|
|
return 0;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
DB_ENV* bdberl_db_env(void)
|
|
{
|
|
assert(NULL != G_DB_ENV);
|
|
return G_DB_ENV;
|
|
}
|
|
|
|
DB* bdberl_lookup_dbref(int dbref)
|
|
{
|
|
assert(NULL != G_DATABASES);
|
|
assert(0 <= dbref);
|
|
assert(G_DATABASES_SIZE > dbref);
|
|
|
|
return G_DATABASES[dbref].db;
|
|
}
|
|
|
|
void bdberl_general_tpool_run(TPoolJobFunc main_fn, PortData* d, TPoolJobFunc cancel_fn,
|
|
TPoolJob** job_ptr)
|
|
{
|
|
d->async_pool = G_TPOOL_GENERAL;
|
|
bdberl_tpool_run(d->async_pool, main_fn, d, NULL, job_ptr);
|
|
}
|
|
|
|
void bdberl_txn_tpool_run(TPoolJobFunc main_fn, PortData* d, TPoolJobFunc cancel_fn,
|
|
TPoolJob** job_ptr)
|
|
{
|
|
d->async_pool = G_TPOOL_TXNS;
|
|
bdberl_tpool_run(d->async_pool, main_fn, d, NULL, job_ptr);
|
|
}
|
|
|
|
static int open_database(const char* name, DBTYPE type, unsigned int flags, PortData* data, int* dbref_res)
|
|
{
|
|
*dbref_res = -1;
|
|
|
|
READ_LOCK(G_DATABASES_RWLOCK);
|
|
|
|
// Look up the database by name in our hash table
|
|
Database* database = (Database*)hive_hash_get(G_DATABASES_NAMES, name);
|
|
if (database)
|
|
{
|
|
// Convert the database pointer into a dbref
|
|
int dbref = database - G_DATABASES;
|
|
|
|
// Great, the database was previously opened by someone else. Add it to our
|
|
// list of refs, and if it's a new addition also register this port with the
|
|
// Database structure in G_DATABASES
|
|
if (add_dbref(data, dbref))
|
|
{
|
|
// Need to update G_DATABASES -- grab the write lock
|
|
PROMOTE_READ_LOCK(G_DATABASES_RWLOCK);
|
|
|
|
// Add a reference to this port
|
|
add_portref(dbref, data->port);
|
|
|
|
// Release RW lock and return the ref
|
|
WRITE_UNLOCK(G_DATABASES_RWLOCK);
|
|
*dbref_res = dbref;
|
|
return 0;
|
|
}
|
|
else
|
|
{
|
|
// Already in our list of opened databases -- unlock and return the reference
|
|
READ_UNLOCK(G_DATABASES_RWLOCK);
|
|
*dbref_res = dbref;
|
|
return 0;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// This database hasn't been opened yet -- grab a write lock
|
|
PROMOTE_READ_LOCK(G_DATABASES_RWLOCK);
|
|
|
|
// While waiting on the write lock, another thread could have slipped in and
|
|
// opened the database, so do one more check to see if the database is already
|
|
// open
|
|
database = (Database*)hive_hash_get(G_DATABASES_NAMES, name);
|
|
if (database)
|
|
{
|
|
// Database got created while we were waiting on the write lock, add a reference
|
|
// to our port and drop the lock ASAP
|
|
int dbref = database - G_DATABASES;
|
|
add_portref(dbref, data->port);
|
|
WRITE_UNLOCK(G_DATABASES_RWLOCK);
|
|
|
|
add_dbref(data, dbref);
|
|
*dbref_res = dbref;
|
|
return 0;
|
|
}
|
|
|
|
// Database hasn't been created while we were waiting on write lock, so
|
|
// create/open it
|
|
|
|
// Find the first available slot in G_DATABASES; the index will be our
|
|
// reference for database operations
|
|
int dbref = alloc_dbref();
|
|
if (dbref < 0)
|
|
{
|
|
// No more slots available
|
|
WRITE_UNLOCK(G_DATABASES_RWLOCK);
|
|
return ERROR_MAX_DBS;
|
|
}
|
|
|
|
// Create the DB handle
|
|
DB* db = NULL;
|
|
DBGCMD(data, "db_create(&db, %p, 0);", G_DB_ENV);
|
|
int rc = db_create(&db, G_DB_ENV, 0);
|
|
DBGCMD(data, "rc = %s (%d) db = %p", rc == 0 ? "ok" : bdberl_rc_to_atom_str(rc), rc, db);
|
|
if (rc != 0)
|
|
{
|
|
// Failure while creating the database handle -- drop our lock and return
|
|
// the code
|
|
WRITE_UNLOCK(G_DATABASES_RWLOCK);
|
|
return rc;
|
|
}
|
|
|
|
// If a custom page size has been specified, try to use it
|
|
if (G_PAGE_SIZE > 0)
|
|
{
|
|
if (db->set_pagesize(db, G_PAGE_SIZE) != 0)
|
|
{
|
|
bdb_errcall(G_DB_ENV, "", "Failed to set page size.");
|
|
}
|
|
}
|
|
|
|
// Attempt to open our database
|
|
DBGCMD(data, "db->open(%p, 0, '%s', 0, %x, %08x, 0);", db, name, type, flags);
|
|
rc = db->open(db, 0, name, 0, type, flags, 0);
|
|
DBGCMDRC(data, rc);
|
|
if (rc != 0)
|
|
{
|
|
// Failure while opening the database -- cleanup the handle, drop the lock
|
|
// and return
|
|
db->close(db, 0);
|
|
WRITE_UNLOCK(G_DATABASES_RWLOCK);
|
|
return rc;
|
|
}
|
|
|
|
// Database is open. Store all the data into the allocated ref
|
|
G_DATABASES[dbref].db = db;
|
|
G_DATABASES[dbref].name = strdup(name);
|
|
G_DATABASES[dbref].ports = zalloc(sizeof(PortList));
|
|
G_DATABASES[dbref].ports->port = data->port;
|
|
|
|
// Make entry in hash table of names
|
|
hive_hash_add(G_DATABASES_NAMES, G_DATABASES[dbref].name, &(G_DATABASES[dbref]));
|
|
|
|
// Drop the write lock
|
|
WRITE_UNLOCK(G_DATABASES_RWLOCK);
|
|
|
|
// Add the dbref to the port list
|
|
add_dbref(data, dbref);
|
|
*dbref_res = dbref;
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
static int close_database(int dbref, unsigned flags, PortData* data)
|
|
{
|
|
// Remove this database from our list
|
|
if (del_dbref(data, dbref))
|
|
{
|
|
// Something was actually deleted from our list -- now we need to disassociate the
|
|
// calling port with the global database structure.
|
|
WRITE_LOCK(G_DATABASES_RWLOCK);
|
|
|
|
assert(G_DATABASES[dbref].db != 0);
|
|
assert(G_DATABASES[dbref].ports != 0);
|
|
|
|
// Now disassociate this port from the database's port list
|
|
del_portref(dbref, data->port);
|
|
|
|
// Finally, if there are no other references to the database, close out
|
|
// the database completely
|
|
Database* database = &G_DATABASES[dbref];
|
|
if (database->ports == 0)
|
|
{
|
|
// Close out the BDB handle
|
|
DBGCMD(data, "database->db->close(%p, %08x) (for dbref %d)", database->db, flags, dbref);
|
|
int rc = database->db->close(database->db, flags);
|
|
DBGCMDRC(data, rc);
|
|
|
|
// Remove the entry from the names map
|
|
hive_hash_remove(G_DATABASES_NAMES, database->name);
|
|
free((char*)database->name);
|
|
|
|
// Zero out the whole record
|
|
memset(database, '\0', sizeof(Database));
|
|
}
|
|
|
|
WRITE_UNLOCK(G_DATABASES_RWLOCK);
|
|
return ERROR_NONE;
|
|
}
|
|
else
|
|
{
|
|
return ERROR_INVALID_DBREF;
|
|
}
|
|
}
|
|
|
|
// Abort the transaction and clean up
|
|
static void abort_txn(PortData* d)
|
|
{
|
|
if (d->txn)
|
|
{
|
|
DBGCMD(d, "d->txn->abort(%p)", d->txn);
|
|
int rc = d->txn->abort(d->txn);
|
|
DBGCMDRC(d, rc);
|
|
d->txn = NULL;
|
|
}
|
|
}
|
|
|
|
static int delete_database(const char* name)
|
|
{
|
|
// Go directly to a write lock on the global databases structure
|
|
WRITE_LOCK(G_DATABASES_RWLOCK);
|
|
|
|
// Make sure the database is not opened by anyone
|
|
if (hive_hash_get(G_DATABASES_NAMES, name))
|
|
{
|
|
WRITE_UNLOCK(G_DATABASES_RWLOCK);
|
|
return ERROR_DB_ACTIVE;
|
|
}
|
|
|
|
// Good, database doesn't seem to be open -- attempt the delete
|
|
DBG("Attempting to delete database: %s\r\n", name);
|
|
int rc = G_DB_ENV->dbremove(G_DB_ENV, 0, name, 0, DB_AUTO_COMMIT);
|
|
WRITE_UNLOCK(G_DATABASES_RWLOCK);
|
|
|
|
return rc;
|
|
}
|
|
|
|
/**
|
|
* Given a target system parameter, return the requested value
|
|
*/
|
|
static void get_info(int target, void* values, BinHelper* bh)
|
|
{
|
|
switch(target)
|
|
{
|
|
case SYSP_CACHESIZE_GET:
|
|
{
|
|
unsigned int gbytes = 0;
|
|
unsigned int bytes = 0;
|
|
int caches = 0;
|
|
int rc = G_DB_ENV->get_cachesize(G_DB_ENV, &gbytes, &bytes, &caches);
|
|
bin_helper_init(bh);
|
|
bin_helper_push_int32(bh, rc);
|
|
bin_helper_push_int32(bh, gbytes);
|
|
bin_helper_push_int32(bh, bytes);
|
|
bin_helper_push_int32(bh, caches);
|
|
break;
|
|
}
|
|
case SYSP_TXN_TIMEOUT_GET:
|
|
{
|
|
unsigned int timeout = 0;
|
|
int rc = G_DB_ENV->get_timeout(G_DB_ENV, &timeout, DB_SET_TXN_TIMEOUT);
|
|
bin_helper_init(bh);
|
|
bin_helper_push_int32(bh, rc);
|
|
bin_helper_push_int32(bh, timeout);
|
|
break;
|
|
}
|
|
case SYSP_DATA_DIR_GET:
|
|
{
|
|
const char** dirs = 0;
|
|
int rc = G_DB_ENV->get_data_dirs(G_DB_ENV, &dirs);
|
|
bin_helper_init(bh);
|
|
bin_helper_push_int32(bh, rc);
|
|
while (dirs && *dirs)
|
|
{
|
|
bin_helper_push_string(bh, *dirs);
|
|
dirs++;
|
|
}
|
|
break;
|
|
}
|
|
case SYSP_LOG_DIR_GET:
|
|
{
|
|
const char* dir = 0;
|
|
// Get the log dir - according to BDB docs, if not set
|
|
// the DB_HOME is used.
|
|
int rc = G_DB_ENV->get_lg_dir(G_DB_ENV, &dir);
|
|
if (NULL == dir)
|
|
{
|
|
if (0 != G_DB_ENV->get_home(G_DB_ENV, &dir))
|
|
{
|
|
dir = NULL;
|
|
}
|
|
}
|
|
bin_helper_init(bh);
|
|
bin_helper_push_int32(bh, rc);
|
|
bin_helper_push_string(bh, dir); // Will convert NULL pointer to "<null>"
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
void bdberl_async_cleanup(PortData* d)
|
|
{
|
|
// Release the port for another operation
|
|
d->work_buffer_offset = 0;
|
|
erl_drv_mutex_lock(d->port_lock);
|
|
d->async_dbref = -1;
|
|
d->async_pool = 0;
|
|
d->async_job = 0;
|
|
d->async_op = CMD_NONE;
|
|
erl_drv_mutex_unlock(d->port_lock);
|
|
}
|
|
|
|
|
|
// Convert an rc from BDB into a string suitable for driver_mk_atom
|
|
// returns NULL on no match
|
|
char *bdberl_rc_to_atom_str(int rc)
|
|
{
|
|
char *error = erl_errno_id(rc);
|
|
if (NULL != error && strcmp("unknown", error) != 0)
|
|
{
|
|
return error;
|
|
}
|
|
else
|
|
{
|
|
switch (rc)
|
|
{
|
|
// bdberl driver errors
|
|
case ERROR_MAX_DBS: return "max_dbs";
|
|
case ERROR_ASYNC_PENDING: return "async_pending";
|
|
case ERROR_INVALID_DBREF: return "invalid_db";
|
|
case ERROR_TXN_OPEN: return "transaction_open";
|
|
case ERROR_NO_TXN: return "no_txn";
|
|
case ERROR_CURSOR_OPEN: return "cursor_open";
|
|
case ERROR_NO_CURSOR: return "no_cursor";
|
|
case ERROR_DB_ACTIVE: return "db_active";
|
|
case ERROR_INVALID_CMD: return "invalid_cmd";
|
|
case ERROR_INVALID_DB_TYPE: return "invalid_db_type";
|
|
case ERROR_INVALID_VALUE: return "invalid_value";
|
|
// bonafide BDB errors
|
|
case DB_BUFFER_SMALL: return "buffer_small";
|
|
case DB_DONOTINDEX: return "do_not_index";
|
|
case DB_FOREIGN_CONFLICT: return "foreign_conflict";
|
|
case DB_KEYEMPTY: return "key_empty";
|
|
case DB_KEYEXIST: return "key_exist";
|
|
case DB_LOCK_DEADLOCK: return "deadlock";
|
|
case DB_LOCK_NOTGRANTED: return "lock_not_granted";
|
|
case DB_LOG_BUFFER_FULL: return "log_buffer_full";
|
|
case DB_NOTFOUND: return "not_found";
|
|
case DB_OLD_VERSION: return "old_version";
|
|
case DB_PAGE_NOTFOUND: return "page_not_found";
|
|
case DB_RUNRECOVERY: return "run_recovery";
|
|
case DB_VERIFY_BAD: return "verify_bad";
|
|
case DB_VERSION_MISMATCH: return "version_mismatch";
|
|
default: return NULL;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
// Send a {dirinfo, Path, FsId, MbyteAvail} message to pid given.
|
|
// Send an {errno, Reason} on failure
|
|
// returns 0 on success, errno on failure
|
|
static int send_dir_info(ErlDrvPort port, ErlDrvTermData pid, const char *path)
|
|
{
|
|
struct statvfs svfs;
|
|
int rc;
|
|
|
|
if (NULL == path)
|
|
{
|
|
rc = EINVAL;
|
|
}
|
|
else if (0 != statvfs(path, &svfs))
|
|
{
|
|
rc = errno;
|
|
}
|
|
else
|
|
{
|
|
rc = 0;
|
|
}
|
|
|
|
if (0 != rc)
|
|
{
|
|
bdberl_send_rc(port, pid, rc);
|
|
}
|
|
else
|
|
{
|
|
fsblkcnt_t blocks_per_mbyte = 1024 * 1024 / svfs.f_frsize;
|
|
assert(blocks_per_mbyte > 0);
|
|
unsigned int mbyte_avail = (unsigned int) (svfs.f_bavail / blocks_per_mbyte);
|
|
int path_len = strlen(path);
|
|
|
|
ErlDrvTermData response[] = { ERL_DRV_ATOM, driver_mk_atom("dirinfo"),
|
|
ERL_DRV_STRING, (ErlDrvTermData) path, path_len,
|
|
// send fsid as a binary as will only be used
|
|
// to compare which physical filesystem is on
|
|
// and the definintion varies between platforms.
|
|
ERL_DRV_BUF2BINARY, (ErlDrvTermData) &svfs.f_fsid,
|
|
sizeof(svfs.f_fsid),
|
|
ERL_DRV_UINT, mbyte_avail,
|
|
ERL_DRV_TUPLE, 4};
|
|
driver_send_term(port, pid, response, sizeof(response) / sizeof(response[0]));
|
|
}
|
|
return rc;
|
|
}
|
|
|
|
|
|
void bdberl_send_rc(ErlDrvPort port, ErlDrvTermData pid, int rc)
|
|
{
|
|
// TODO: May need to tag the messages a bit more explicitly so that if another async
|
|
// job runs to completion before the message gets delivered we don't mis-interpret this
|
|
// response code.
|
|
if (rc == 0)
|
|
{
|
|
ErlDrvTermData response[] = {ERL_DRV_ATOM, driver_mk_atom("ok")};
|
|
driver_send_term(port, pid, response, sizeof(response) / sizeof(response[0]));
|
|
}
|
|
else
|
|
{
|
|
// See if this is a standard errno that we have an erlang code for
|
|
char *error = bdberl_rc_to_atom_str(rc);
|
|
if (NULL != error)
|
|
{
|
|
ErlDrvTermData response[] = { ERL_DRV_ATOM, driver_mk_atom("error"),
|
|
ERL_DRV_ATOM, driver_mk_atom(error),
|
|
ERL_DRV_TUPLE, 2};
|
|
driver_send_term(port, pid, response, sizeof(response) / sizeof(response[0]));
|
|
}
|
|
else
|
|
{
|
|
ErlDrvTermData response[] = { ERL_DRV_ATOM, driver_mk_atom("error"),
|
|
ERL_DRV_ATOM, driver_mk_atom("unknown"),
|
|
ERL_DRV_INT, rc,
|
|
ERL_DRV_TUPLE, 2,
|
|
ERL_DRV_TUPLE, 2};
|
|
driver_send_term(port, pid, response, sizeof(response) / sizeof(response[0]));
|
|
}
|
|
}
|
|
}
|
|
|
|
void bdberl_async_cleanup_and_send_rc(PortData* d, int rc)
|
|
{
|
|
// Save the port and pid references -- we need copies independent from the PortData
|
|
// structure. Once we release the port_lock after clearing the cmd, it's possible that
|
|
// the port could go away without waiting on us to finish. This is acceptable, but we need
|
|
// to be certain that there is no overlap of data between the two threads. driver_send_term
|
|
// is safe to use from a thread, even if the port you're sending from has already expired.
|
|
ErlDrvPort port = d->port;
|
|
ErlDrvTermData pid = d->port_owner;
|
|
|
|
bdberl_async_cleanup(d);
|
|
bdberl_send_rc(port, pid, rc);
|
|
}
|
|
|
|
static void async_cleanup_and_send_kv(PortData* d, int rc, DBT* key, DBT* value)
|
|
{
|
|
// Save the port and pid references -- we need copies independent from the PortData
|
|
// structure. Once we release the port_lock after clearing the cmd, it's possible that
|
|
// the port could go away without waiting on us to finish. This is acceptable, but we need
|
|
// to be certain that there is no overlap of data between the two threads. driver_send_term
|
|
// is safe to use from a thread, even if the port you're sending from has already expired.
|
|
ErlDrvPort port = d->port;
|
|
ErlDrvTermData pid = d->port_owner;
|
|
|
|
bdberl_async_cleanup(d);
|
|
|
|
// Notify port of result
|
|
if (rc == 0)
|
|
{
|
|
ErlDrvTermData response[] = { ERL_DRV_ATOM, driver_mk_atom("ok"),
|
|
ERL_DRV_BUF2BINARY, (ErlDrvTermData)key->data, (ErlDrvUInt)key->size,
|
|
ERL_DRV_BUF2BINARY, (ErlDrvTermData)value->data, (ErlDrvUInt)value->size,
|
|
ERL_DRV_TUPLE, 3};
|
|
driver_send_term(port, pid, response, sizeof(response) / sizeof(response[0]));
|
|
}
|
|
else if (rc == DB_NOTFOUND)
|
|
{
|
|
ErlDrvTermData response[] = { ERL_DRV_ATOM, driver_mk_atom("not_found") };
|
|
driver_send_term(port, pid, response, sizeof(response) / sizeof(response[0]));
|
|
}
|
|
else
|
|
{
|
|
// See if this is a standard errno that we have an erlang code for
|
|
char *error = bdberl_rc_to_atom_str(rc);
|
|
if (NULL != error)
|
|
{
|
|
ErlDrvTermData response[] = { ERL_DRV_ATOM, driver_mk_atom("error"),
|
|
ERL_DRV_ATOM, driver_mk_atom(error),
|
|
ERL_DRV_TUPLE, 2};
|
|
driver_send_term(port, pid, response, sizeof(response) / sizeof(response[0]));
|
|
}
|
|
else
|
|
{
|
|
ErlDrvTermData response[] = { ERL_DRV_ATOM, driver_mk_atom("error"),
|
|
ERL_DRV_ATOM, driver_mk_atom("unknown"),
|
|
ERL_DRV_INT, rc,
|
|
ERL_DRV_TUPLE, 2,
|
|
ERL_DRV_TUPLE, 2};
|
|
driver_send_term(port, pid, response, sizeof(response) / sizeof(response[0]));
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
static void do_async_put(void* arg)
|
|
{
|
|
// Payload is: <<DbRef:32, Flags:32, KeyLen:32, Key:KeyLen, ValLen:32, Val:ValLen>>
|
|
PortData* d = (PortData*)arg;
|
|
|
|
// Get the database reference and flags from the payload
|
|
int dbref = UNPACK_INT(d->work_buffer, 0);
|
|
DB* db = bdberl_lookup_dbref(dbref);
|
|
unsigned int flags = UNPACK_INT(d->work_buffer, 4);
|
|
|
|
// Setup DBTs
|
|
DBT key;
|
|
DBT value;
|
|
memset(&key, '\0', sizeof(DBT));
|
|
memset(&value, '\0', sizeof(DBT));
|
|
|
|
// Parse payload into DBTs
|
|
key.size = UNPACK_INT(d->work_buffer, 8);
|
|
key.data = UNPACK_BLOB(d->work_buffer, 12);
|
|
value.size = UNPACK_INT(d->work_buffer, 12 + key.size);
|
|
value.data = UNPACK_BLOB(d->work_buffer, 12 + key.size + 4);
|
|
|
|
// Check CRC in value payload - first 4 bytes are CRC of rest of bytes
|
|
assert(value.size >= 4);
|
|
uint32_t calc_crc32 = bdberl_crc32(value.data+4, value.size-4);
|
|
uint32_t buf_crc32 = *(uint32_t*) value.data;
|
|
|
|
int rc;
|
|
if (calc_crc32 != buf_crc32)
|
|
{
|
|
DBGCMD(d, "CRC-32 error on put data - buffer %08X calculated %08X.", buf_crc32, calc_crc32);
|
|
rc = ERROR_INVALID_VALUE;
|
|
}
|
|
else
|
|
{
|
|
// Execute the actual put. All databases are opened with AUTO_COMMIT, so if msg->port->txn
|
|
// is NULL, the put will still be atomic
|
|
DBGCMD(d, "db->put(%p, %p, %p, %p, %08X) key=%p(%d) value=%p(%d)",
|
|
db, d->txn, &key, &value, flags, key.data, key.size, value.data, value.size);
|
|
rc = db->put(db, d->txn, &key, &value, flags);
|
|
DBGCMDRC(d, rc);
|
|
}
|
|
|
|
// If any error occurs while we have a txn action, abort it
|
|
if (d->txn && rc)
|
|
{
|
|
abort_txn(d);
|
|
}
|
|
else if (d->txn && d->async_op == CMD_PUT_COMMIT)
|
|
{
|
|
// Put needs to be followed by a commit -- saves us another pass through the driver and
|
|
// threadpool queues
|
|
rc = d->txn->commit(d->txn, 0);
|
|
|
|
// Regardless of the txn commit outcome, we still need to invalidate the transaction
|
|
d->txn = 0;
|
|
}
|
|
|
|
bdberl_async_cleanup_and_send_rc(d, rc);
|
|
}
|
|
|
|
static void do_async_get(void* arg)
|
|
{
|
|
// Payload is: << DbRef:32, Flags:32, KeyLen:32, Key:KeyLen >>
|
|
PortData* d = (PortData*)arg;
|
|
|
|
// Get the database object, using the provided ref
|
|
int dbref = UNPACK_INT(d->work_buffer, 0);
|
|
DB* db = bdberl_lookup_dbref(dbref);
|
|
|
|
// Extract operation flags
|
|
unsigned flags = UNPACK_INT(d->work_buffer, 4);
|
|
|
|
// Setup DBTs
|
|
DBT key;
|
|
DBT value;
|
|
memset(&key, '\0', sizeof(DBT));
|
|
memset(&value, '\0', sizeof(DBT));
|
|
|
|
// Parse payload into DBT
|
|
key.size = UNPACK_INT(d->work_buffer, 8);
|
|
key.data = UNPACK_BLOB(d->work_buffer, 12);
|
|
|
|
// Allocate a buffer for the output value
|
|
#ifdef USE_DRIVER_REALLOC
|
|
value.ulen = 4096;
|
|
value.data = driver_alloc(value.ulen);
|
|
value.flags = DB_DBT_USERMEM;
|
|
#else
|
|
value.flags = DB_DBT_MALLOC;
|
|
#endif
|
|
|
|
int rc = db->get(db, d->txn, &key, &value, flags);
|
|
#ifdef USE_DRIVER_REALLOC
|
|
while (rc == DB_BUFFER_SMALL)
|
|
{
|
|
// Grow our value buffer
|
|
value.ulen = value.size;
|
|
value.size = 0;
|
|
value.data = driver_realloc(value.data, value.ulen);
|
|
rc = db->get(db, d->txn, &key, &value, flags);
|
|
}
|
|
#endif
|
|
// Check CRC - first 4 bytes are CRC of rest of bytes
|
|
if (0 == rc)
|
|
{
|
|
assert(value.size >= 4);
|
|
uint32_t calc_crc32 = bdberl_crc32(value.data+4, value.size-4);
|
|
uint32_t buf_crc32 = *(uint32_t*) value.data;
|
|
|
|
if (calc_crc32 != buf_crc32)
|
|
{
|
|
DBGCMD(d, "CRC-32 error on get data - buffer %08X calculated %08X.",
|
|
buf_crc32, calc_crc32);
|
|
rc = ERROR_INVALID_VALUE;
|
|
}
|
|
}
|
|
|
|
// Cleanup transaction as necessary
|
|
if (rc && rc != DB_NOTFOUND && d->txn)
|
|
{
|
|
d->txn->abort(d->txn);
|
|
d->txn = 0;
|
|
}
|
|
|
|
async_cleanup_and_send_kv(d, rc, &key, &value);
|
|
|
|
// Finally, clean up value buffer (driver_send_term made a copy)
|
|
#ifdef USE_DRIVER_REALLOC
|
|
driver_free(value.data);
|
|
#else
|
|
if (value.data)
|
|
free(value.data);
|
|
#endif
|
|
}
|
|
|
|
static void do_async_txnop(void* arg)
|
|
{
|
|
PortData* d = (PortData*)arg;
|
|
|
|
// Execute the actual begin/commit/abort
|
|
int rc = 0;
|
|
if (d->async_op == CMD_TXN_BEGIN)
|
|
{
|
|
DBGCMD(d, "G_DB_ENV->txn_begin(%p, 0, %p, %08X)", G_DB_ENV, d->txn, d->async_flags);
|
|
rc = G_DB_ENV->txn_begin(G_DB_ENV, 0, &(d->txn), d->async_flags);
|
|
DBGCMD(d, "rc = %s (%d) d->txn = %p", rc == 0 ? "ok" : bdberl_rc_to_atom_str(rc), rc, d->txn);
|
|
|
|
}
|
|
else if (d->async_op == CMD_TXN_COMMIT)
|
|
{
|
|
assert(NULL != d->txn);
|
|
DBGCMD(d, "d->txn->txn_commit(%p, %08X)", d->txn, d->async_flags);
|
|
rc = d->txn->commit(d->txn, d->async_flags);
|
|
DBGCMDRC(d, rc);
|
|
d->txn = 0;
|
|
}
|
|
else
|
|
{
|
|
assert(d->async_op == CMD_TXN_ABORT);
|
|
abort_txn(d);
|
|
}
|
|
|
|
bdberl_async_cleanup_and_send_rc(d, rc);
|
|
}
|
|
|
|
|
|
static void do_async_cursor_get(void* arg)
|
|
{
|
|
// Payload is: << DbRef:32, Flags:32, KeyLen:32, Key:KeyLen >>
|
|
PortData* d = (PortData*)arg;
|
|
assert(NULL != d->cursor);
|
|
|
|
// Setup DBTs
|
|
DBT key;
|
|
DBT value;
|
|
memset(&key, '\0', sizeof(DBT));
|
|
memset(&value, '\0', sizeof(DBT));
|
|
|
|
// Determine what type of cursor get to perform
|
|
int flags = 0;
|
|
switch (d->async_op)
|
|
{
|
|
case CMD_CURSOR_NEXT:
|
|
flags = DB_NEXT; break;
|
|
case CMD_CURSOR_PREV:
|
|
flags = DB_PREV; break;
|
|
default:
|
|
flags = DB_CURRENT;
|
|
}
|
|
|
|
// Execute the operation
|
|
DBGCMD(d, "d->cursor->get(%p, %p, %p, %08X);", d->cursor, &key, &value, flags);
|
|
int rc = d->cursor->get(d->cursor, &key, &value, flags);
|
|
DBGCMDRC(d, rc);
|
|
|
|
// Check CRC - first 4 bytes are CRC of rest of bytes
|
|
if (0 == rc)
|
|
{
|
|
assert(value.size >= 4);
|
|
uint32_t calc_crc32 = bdberl_crc32(value.data+4, value.size-4);
|
|
uint32_t file_crc32 = *(uint32_t*) value.data;
|
|
|
|
if (calc_crc32 != file_crc32)
|
|
{
|
|
rc = ERROR_INVALID_VALUE;
|
|
}
|
|
}
|
|
|
|
// Cleanup as necessary; any sort of failure means we need to close the cursor and abort
|
|
// the transaction
|
|
if (rc && rc != DB_NOTFOUND)
|
|
{
|
|
DBG("cursor flags=%d rc=%d\n", flags, rc);
|
|
|
|
d->cursor->close(d->cursor);
|
|
d->cursor = 0;
|
|
abort_txn(d);
|
|
}
|
|
|
|
async_cleanup_and_send_kv(d, rc, &key, &value);
|
|
}
|
|
|
|
static void do_async_truncate(void* arg)
|
|
{
|
|
// Payload is: <<DbRef:32>>
|
|
PortData* d = (PortData*)arg;
|
|
|
|
// Get the database reference and flags from the payload
|
|
int rc = 0;
|
|
|
|
if (d->async_dbref == -1)
|
|
{
|
|
DBG("Truncating all open databases...\r\n");
|
|
|
|
// Iterate over the whole database list skipping null entries
|
|
int i = 0; // I hate C
|
|
for ( ; i < G_DATABASES_SIZE; ++i)
|
|
{
|
|
Database* database = &G_DATABASES[i];
|
|
if (database != NULL && database->db != 0)
|
|
{
|
|
DB* db = database->db;
|
|
u_int32_t count = 0;
|
|
|
|
DBGCMD(d, "db->truncate(%p, %p, %p, 0) dbref=%d", db, d->txn, &count, i);
|
|
rc = db->truncate(db, d->txn, &count, 0);
|
|
DBGCMD(d, "rc = %s (%d) count=%d",
|
|
rc == 0 ? "ok" : bdberl_rc_to_atom_str(rc), rc, count);
|
|
|
|
if (rc != 0)
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
DB* db = G_DATABASES[d->async_dbref].db;
|
|
u_int32_t count = 0;
|
|
DBGCMD(d, "db->truncate(%p, %p, %p, 0) dbref=%d", db, d->txn, &count, d->async_dbref);
|
|
rc = db->truncate(db, d->txn, &count, 0);
|
|
DBGCMD(d, "rc = %s (%d) count=%d", rc == 0 ? "ok" : bdberl_rc_to_atom_str(rc),
|
|
rc, count);
|
|
}
|
|
|
|
// If any error occurs while we have a txn action, abort it
|
|
if (d->txn && rc)
|
|
{
|
|
abort_txn(d);
|
|
}
|
|
|
|
bdberl_async_cleanup_and_send_rc(d, rc);
|
|
}
|
|
|
|
static void do_sync_data_dirs_info(PortData *d)
|
|
{
|
|
// Get DB_HOME and find the real path
|
|
const char *db_home = NULL;
|
|
const char *data_dir = NULL;
|
|
const char **data_dirs = NULL;
|
|
char db_home_realpath[PATH_MAX+1];
|
|
char data_dir_realpath[PATH_MAX+1];
|
|
int got_db_home = 0;
|
|
|
|
// Lookup the environment and add it if not explicitly included in the data_dirs
|
|
int rc = G_DB_ENV->get_home(G_DB_ENV, &db_home);
|
|
if (rc != 0 || NULL == db_home)
|
|
{
|
|
// If no db_home we'll have to rely on whatever the global environment is configured with
|
|
got_db_home = 1;
|
|
}
|
|
else
|
|
{
|
|
if (NULL == realpath(db_home, db_home_realpath))
|
|
rc = errno;
|
|
}
|
|
|
|
// Get the data first
|
|
rc = G_DB_ENV->get_data_dirs(G_DB_ENV, &data_dirs);
|
|
int i;
|
|
for (i = 0; 0 == rc && NULL != data_dirs && NULL != data_dirs[i]; i++)
|
|
{
|
|
data_dir = data_dirs[i];
|
|
|
|
if (!got_db_home)
|
|
{
|
|
// Get the real path of the data dir
|
|
if (NULL == realpath(data_dir, data_dir_realpath))
|
|
{
|
|
rc = errno;
|
|
}
|
|
else
|
|
{
|
|
// Set got_db_home if it matches
|
|
if (0 == strcmp(data_dir_realpath, db_home_realpath))
|
|
{
|
|
got_db_home = 1;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (0 == rc)
|
|
{
|
|
rc = send_dir_info(d->port, d->port_owner, data_dir);
|
|
}
|
|
}
|
|
|
|
// BDB always searches the environment home too so add it to the list
|
|
if (!got_db_home && rc == 0)
|
|
{
|
|
rc = send_dir_info(d->port, d->port_owner, db_home);
|
|
}
|
|
|
|
// Send the return code - will termiante the receive loop in bdberl.erl
|
|
bdberl_send_rc(d->port, d->port_owner, rc);
|
|
}
|
|
|
|
|
|
// Send bdberl specific driver info
|
|
static void do_sync_driver_info(PortData *d)
|
|
{
|
|
unsigned int txn_pending;
|
|
unsigned int txn_active;
|
|
unsigned int general_pending;
|
|
unsigned int general_active;
|
|
bdberl_tpool_job_count(G_TPOOL_GENERAL, &general_pending, &general_active);
|
|
bdberl_tpool_job_count(G_TPOOL_TXNS, &txn_pending, &txn_active);
|
|
|
|
ErlDrvPort port = d->port;
|
|
ErlDrvTermData pid = d->port_owner;
|
|
ErlDrvTermData response[] = {
|
|
ERL_DRV_ATOM, driver_mk_atom("ok"),
|
|
// Start of list
|
|
ERL_DRV_ATOM, driver_mk_atom("databases_size"),
|
|
ERL_DRV_UINT, G_DATABASES_SIZE,
|
|
ERL_DRV_TUPLE, 2,
|
|
ERL_DRV_ATOM, driver_mk_atom("deadlock_interval"),
|
|
ERL_DRV_UINT, G_DEADLOCK_CHECK_INTERVAL,
|
|
ERL_DRV_TUPLE, 2,
|
|
ERL_DRV_ATOM, driver_mk_atom("trickle_interval"),
|
|
ERL_DRV_UINT, G_TRICKLE_INTERVAL,
|
|
ERL_DRV_TUPLE, 2,
|
|
ERL_DRV_ATOM, driver_mk_atom("trickle_percentage"),
|
|
ERL_DRV_UINT, G_TRICKLE_PERCENTAGE,
|
|
ERL_DRV_TUPLE, 2,
|
|
ERL_DRV_ATOM, driver_mk_atom("checkpoint_interval"),
|
|
ERL_DRV_UINT, G_CHECKPOINT_INTERVAL,
|
|
ERL_DRV_TUPLE, 2,
|
|
ERL_DRV_ATOM, driver_mk_atom("num_general_threads"),
|
|
ERL_DRV_UINT, G_NUM_GENERAL_THREADS,
|
|
ERL_DRV_TUPLE, 2,
|
|
ERL_DRV_ATOM, driver_mk_atom("num_txn_threads"),
|
|
ERL_DRV_UINT, G_NUM_TXN_THREADS,
|
|
ERL_DRV_TUPLE, 2,
|
|
ERL_DRV_ATOM, driver_mk_atom("general_jobs_pending"),
|
|
ERL_DRV_UINT, general_pending,
|
|
ERL_DRV_TUPLE, 2,
|
|
ERL_DRV_ATOM, driver_mk_atom("general_jobs_active"),
|
|
ERL_DRV_UINT, general_active,
|
|
ERL_DRV_TUPLE, 2,
|
|
ERL_DRV_ATOM, driver_mk_atom("txn_jobs_pending"),
|
|
ERL_DRV_UINT, txn_pending,
|
|
ERL_DRV_TUPLE, 2,
|
|
ERL_DRV_ATOM, driver_mk_atom("txn_jobs_active"),
|
|
ERL_DRV_UINT, txn_active,
|
|
ERL_DRV_TUPLE, 2,
|
|
// End of list
|
|
ERL_DRV_NIL,
|
|
ERL_DRV_LIST, 11+1,
|
|
ERL_DRV_TUPLE, 2
|
|
};
|
|
driver_send_term(port, pid, response, sizeof(response) / sizeof(response[0]));
|
|
}
|
|
|
|
|
|
static void* zalloc(unsigned int size)
|
|
{
|
|
void* res = driver_alloc(size);
|
|
memset(res, '\0', size);
|
|
return res;
|
|
}
|
|
|
|
#define zfree(p) driver_free(p)
|
|
|
|
static int add_portref(int dbref, ErlDrvPort port)
|
|
{
|
|
PortList* current = G_DATABASES[dbref].ports;
|
|
if (current)
|
|
{
|
|
PortList* last = 0;
|
|
do
|
|
{
|
|
// If the current item matches our port, bail -- nothing to do here
|
|
if (current->port == port)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
last = current;
|
|
current = current->next;
|
|
} while (current != 0);
|
|
|
|
// At the end of the list -- allocate a new entry for this port
|
|
current = (PortList*)zalloc(sizeof(PortList));
|
|
current->port = port;
|
|
last->next = current;
|
|
return 1;
|
|
}
|
|
else
|
|
{
|
|
// Current was initially NULL, so alloc the first one and add it.
|
|
current = zalloc(sizeof(PortList));
|
|
current->port = port;
|
|
G_DATABASES[dbref].ports = current;
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
static int del_portref(int dbref, ErlDrvPort port)
|
|
{
|
|
PortList* current = G_DATABASES[dbref].ports;
|
|
PortList* last = 0;
|
|
assert(NULL != current);
|
|
while (current)
|
|
{
|
|
if (current->port == port)
|
|
{
|
|
// Found our match -- look back and connect the last item to our next
|
|
if (last)
|
|
{
|
|
last->next = current->next;
|
|
}
|
|
else
|
|
{
|
|
G_DATABASES[dbref].ports = current->next;
|
|
}
|
|
|
|
// Delete this entry
|
|
zfree(current);
|
|
return 1;
|
|
}
|
|
|
|
last = current;
|
|
current = current->next;
|
|
}
|
|
|
|
// Didn't delete anything
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* Add a db reference to a port's DbRefList. Returns 1 if added; 0 if already present
|
|
*/
|
|
static int add_dbref(PortData* data, int dbref)
|
|
{
|
|
DbRefList* current = data->dbrefs;
|
|
if (current)
|
|
{
|
|
DbRefList* last = 0;
|
|
do
|
|
{
|
|
if (current->dbref == dbref)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
last = current;
|
|
current = current->next;
|
|
} while (current != 0);
|
|
|
|
// At the end of the list -- allocate a new entry
|
|
current = zalloc(sizeof(DbRefList));
|
|
current->dbref = dbref;
|
|
last->next = current;
|
|
return 1;
|
|
}
|
|
else
|
|
{
|
|
// Current was initially NULL, so alloc the first one
|
|
current = zalloc(sizeof(DbRefList));
|
|
current->dbref = dbref;
|
|
data->dbrefs = current;
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Delete a db reference from a port's DbRefList. Returns 1 if deleted; 0 if not
|
|
*/
|
|
static int del_dbref(PortData* data, int dbref)
|
|
{
|
|
DbRefList* current = data->dbrefs;
|
|
DbRefList* last = 0;
|
|
assert(NULL != current);
|
|
|
|
while (current)
|
|
{
|
|
if (current->dbref == dbref)
|
|
{
|
|
// Found our match -- look back and connect the last item to our next
|
|
if (last)
|
|
{
|
|
last->next = current->next;
|
|
}
|
|
else
|
|
{
|
|
data->dbrefs = current->next;
|
|
}
|
|
|
|
// Delete this entry
|
|
zfree(current);
|
|
return 1;
|
|
}
|
|
|
|
last = current;
|
|
current = current->next;
|
|
}
|
|
|
|
// Didn't delete anything
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* Validate that a provided dbref is currently opened by a port. Return 1 if true; 0 if false.
|
|
*/
|
|
int bdberl_has_dbref(PortData* data, int dbref)
|
|
{
|
|
DbRefList* current = data->dbrefs;
|
|
while (current)
|
|
{
|
|
if (current->dbref == dbref)
|
|
{
|
|
return 1;
|
|
}
|
|
|
|
current = current->next;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* Allocate a Database structure; find first available slot in G_DATABASES and return the
|
|
* index of it. If no free slots are available, return -1
|
|
*/
|
|
static int alloc_dbref()
|
|
{
|
|
int i;
|
|
for (i = 0; i < G_DATABASES_SIZE; i++)
|
|
{
|
|
if (G_DATABASES[i].db == 0)
|
|
{
|
|
return i;
|
|
}
|
|
}
|
|
|
|
return -1;
|
|
}
|
|
|
|
|
|
/**
|
|
* Utility thread sleep - returns true if being signalled to exit
|
|
* otherwise false if timeout exceeded.
|
|
*/
|
|
int util_thread_usleep(unsigned int usecs)
|
|
{
|
|
fd_set fds;
|
|
struct timeval sleep_until;
|
|
struct timeval sleep_for;
|
|
struct timeval now;
|
|
struct timeval tv;
|
|
int done;
|
|
int nfds = (G_BDBERL_PIPE[0] > G_BDBERL_PIPE[1] ? G_BDBERL_PIPE[0] : G_BDBERL_PIPE[1]) + 1;
|
|
|
|
memset(&sleep_for, 0, sizeof(sleep_for));
|
|
sleep_for.tv_sec = usecs / 1000000;
|
|
sleep_for.tv_usec = usecs % 1000000;
|
|
|
|
gettimeofday(&now, NULL);
|
|
timeradd(&now, &sleep_for, &sleep_until);
|
|
|
|
do
|
|
{
|
|
FD_ZERO(&fds);
|
|
FD_SET(G_BDBERL_PIPE[0], &fds); // read fd of pipe
|
|
|
|
// Check if we have slept long enough
|
|
gettimeofday(&now, NULL);
|
|
if (timercmp(&now, &sleep_until, >))
|
|
{
|
|
done = 1;
|
|
}
|
|
else // take a nap
|
|
{
|
|
// work out the remaining time to sleep on the fd for - make sure that this time
|
|
// is less than or equal to the original sleep time requested, just in
|
|
// case the system time is being adjusted. If the adjustment would result
|
|
// in a longer wait then cap it at the sleep_for time.
|
|
timersub(&sleep_until, &now, &tv);
|
|
if (timercmp(&tv, &sleep_for, >))
|
|
{
|
|
memcpy(&tv, &sleep_for, sizeof(tv));
|
|
}
|
|
|
|
done = 1;
|
|
if (-1 == select(nfds, &fds, NULL, NULL, &tv))
|
|
{
|
|
if (EINTR == errno) // a signal woke up select, back to sleep for us
|
|
{
|
|
done = 0;
|
|
}
|
|
// any other signals can return to the caller to fail safe as it
|
|
// doesn't matter if the util threads get woken up more often
|
|
}
|
|
else if (FD_ISSET(G_BDBERL_PIPE[0], &fds))
|
|
{
|
|
done = 1;
|
|
}
|
|
}
|
|
} while (!done);
|
|
|
|
return FD_ISSET(G_BDBERL_PIPE[0], &fds);
|
|
}
|
|
|
|
/**
|
|
* Thread function that runs the deadlock checker periodically
|
|
*/
|
|
static void* deadlock_check(void* arg)
|
|
{
|
|
while(G_DEADLOCK_CHECK_ACTIVE)
|
|
{
|
|
// Run the lock detection
|
|
int count = 0;
|
|
int rc = G_DB_ENV->lock_detect(G_DB_ENV, 0, DB_LOCK_DEFAULT, &count);
|
|
if (0 != rc)
|
|
{
|
|
DBG("lock_detect returned %s(%d)\n", db_strerror(rc), rc);
|
|
}
|
|
if (count > 0)
|
|
{
|
|
DBG("Rejected deadlocks: %d\r\n", count);
|
|
}
|
|
|
|
if (G_DEADLOCK_CHECK_INTERVAL > 0)
|
|
{
|
|
util_thread_usleep(G_DEADLOCK_CHECK_INTERVAL * 1000);
|
|
}
|
|
}
|
|
|
|
DBG("Deadlock checker exiting.\r\n");
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* Thread function that does trickle writes or checkpointing at fixed intervals.
|
|
*/
|
|
static void* checkpointer(void* arg)
|
|
{
|
|
time_t last_checkpoint_time = time(0);
|
|
time_t last_trickle_time = time(0);
|
|
|
|
while (G_CHECKPOINT_ACTIVE)
|
|
{
|
|
time_t now = time(0);
|
|
if (now - last_checkpoint_time > G_CHECKPOINT_INTERVAL)
|
|
{
|
|
// Time to checkpoint and cleanup log files
|
|
int checkpoint_rc = G_DB_ENV->txn_checkpoint(G_DB_ENV, 0, 0, 0);
|
|
|
|
// Mark the time before starting log_archive so we can know how long it took
|
|
time_t log_now = time(0);
|
|
int log_rc = G_DB_ENV->log_archive(G_DB_ENV, NULL, DB_ARCH_REMOVE);
|
|
time_t finish_now = time(0);
|
|
|
|
// Bundle up the results and elapsed time into a message for the logger
|
|
ErlDrvTermData response[] = { ERL_DRV_ATOM, driver_mk_atom("bdb_checkpoint_stats"),
|
|
ERL_DRV_UINT, log_now - now, /* Elapsed seconds for checkpoint */
|
|
ERL_DRV_UINT, finish_now - log_now, /* Elapsed seconds for log_archive */
|
|
ERL_DRV_INT, checkpoint_rc, /* Return code of checkpoint */
|
|
ERL_DRV_INT, log_rc, /* Return code of log_archive */
|
|
ERL_DRV_TUPLE, 5};
|
|
send_log_message(response, sizeof(response));
|
|
|
|
// Note the time of this checkpoint completion
|
|
last_checkpoint_time = finish_now;
|
|
}
|
|
else if (now - last_trickle_time > G_TRICKLE_INTERVAL)
|
|
{
|
|
// Time to run the trickle operation again
|
|
int pages_wrote = 0;
|
|
int rc = G_DB_ENV->memp_trickle(G_DB_ENV, G_TRICKLE_PERCENTAGE, &pages_wrote);
|
|
time_t finish_now = time(0);
|
|
|
|
// Bundle up the results and elapsed time into a message for the logger
|
|
ErlDrvTermData response[] = { ERL_DRV_ATOM, driver_mk_atom("bdb_trickle_stats"),
|
|
ERL_DRV_UINT, finish_now - now, /* Elapsed seconds for trickle */
|
|
ERL_DRV_UINT, pages_wrote, /* Number of pages flushed */
|
|
ERL_DRV_INT, rc, /* Return code of checkpoint */
|
|
ERL_DRV_TUPLE, 4};
|
|
send_log_message(response, sizeof(response));
|
|
|
|
// Note the time of this trickle completion
|
|
last_trickle_time = finish_now;
|
|
}
|
|
|
|
// Always sleep for one second
|
|
util_thread_usleep(1000000);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void bdb_errcall(const DB_ENV* dbenv, const char* errpfx, const char* msg)
|
|
{
|
|
ErlDrvTermData response[] = { ERL_DRV_ATOM, driver_mk_atom("bdb_error_log"),
|
|
ERL_DRV_STRING, (ErlDrvTermData)msg, (ErlDrvUInt)strlen(msg),
|
|
ERL_DRV_TUPLE, 2};
|
|
send_log_message(response, sizeof(response));
|
|
}
|
|
|
|
static void bdb_msgcall(const DB_ENV* dbenv, const char* msg)
|
|
{
|
|
ErlDrvTermData response[] = { ERL_DRV_ATOM, driver_mk_atom("bdb_info_log"),
|
|
ERL_DRV_STRING, (ErlDrvTermData)msg, (ErlDrvUInt)strlen(msg),
|
|
ERL_DRV_TUPLE, 2};
|
|
send_log_message(response, sizeof(response));
|
|
}
|
|
|
|
static void send_log_message(ErlDrvTermData* msg, int elements)
|
|
{
|
|
if (G_LOG_PORT)
|
|
{
|
|
READ_LOCK(G_LOG_RWLOCK);
|
|
driver_send_term(G_LOG_PORT, G_LOG_PID, msg, elements / sizeof(msg[0]));
|
|
READ_UNLOCK(G_LOG_RWLOCK);
|
|
}
|
|
}
|
|
|
|
#ifdef DEBUG
|
|
#if 1
|
|
static void DBGCMD(PortData *d, const char *fmt, ...)
|
|
{
|
|
char buf[1024];
|
|
|
|
va_list ap;
|
|
va_start(ap, fmt);
|
|
vsnprintf(buf, sizeof(buf), fmt, ap);
|
|
va_end(ap);
|
|
|
|
(void)fprintf(stderr, "threadid %p port %p: %s\r\n", erl_drv_thread_self(), d->port, buf);
|
|
}
|
|
#endif
|
|
static void DBGCMDRC(PortData *d, int rc)
|
|
{
|
|
(void)fprintf(stderr, "threadid %p port %p: rc = %s (%d)\r\n",
|
|
erl_drv_thread_self(), d->port, rc == 0 ? "ok" : bdberl_rc_to_atom_str(rc), rc);
|
|
}
|
|
#endif // DEBUG
|