libdb/test/tcl/repmgr012.tcl

124 lines
3.9 KiB
Tcl
Raw Normal View History

2011-09-13 17:44:24 +00:00
# See the file LICENSE for redistribution information.
#
# Copyright (c) 2007, 2011 Oracle and/or its affiliates. All rights reserved.
#
# $Id$
#
# TEST repmgr012
# TEST repmgr heartbeat test.
# TEST
# TEST Start an appointed master and one client site. Set heartbeat
# TEST send and monitor values and process some transactions. Stop
# TEST sending heartbeats from master and verify that client sees
# TEST a dropped connection.
# TEST
# TEST Run for btree only because access method shouldn't matter.
# TEST
proc repmgr012 { { niter 100 } { tnum "012" } args } {
source ./include.tcl
if { $is_freebsd_test == 1 } {
puts "Skipping replication manager test on FreeBSD platform."
return
}
set method "btree"
set args [convert_args $method $args]
puts "Repmgr$tnum ($method): repmgr heartbeat test."
repmgr012_sub $method $niter $tnum $args
}
proc repmgr012_sub { method niter tnum largs } {
global testdir
global rep_verbose
global verbose_type
set nsites 2
set verbargs ""
if { $rep_verbose == 1 } {
set verbargs " -verbose {$verbose_type on} "
}
env_cleanup $testdir
set ports [available_ports $nsites]
set masterdir $testdir/MASTERDIR
set clientdir $testdir/CLIENTDIR
file mkdir $masterdir
file mkdir $clientdir
# Use different connection retry timeout values to handle any
# collisions from starting sites at the same time by retrying
# at different times.
# Open a master.
puts "\tRepmgr$tnum.a: Start a master."
set ma_envcmd "berkdb_env_noerr -create $verbargs \
-errpfx MASTER -home $masterdir -txn -rep -thread"
set masterenv [eval $ma_envcmd]
$masterenv repmgr -ack all \
-timeout {connection_retry 20000000} \
-local [list localhost [lindex $ports 0]] \
-start master
# Open a client.
puts "\tRepmgr$tnum.b: Start a client."
set cl_envcmd "berkdb_env_noerr -create $verbargs \
-errpfx CLIENT -home $clientdir -txn -rep -thread"
set clientenv [eval $cl_envcmd]
$clientenv repmgr -ack all \
-timeout {connection_retry 10000000} \
-local [list localhost [lindex $ports 1]] \
-remote [list localhost [lindex $ports 0]] \
-start client
await_startup_done $clientenv
#
# Use of -ack all guarantees replication complete before repmgr send
# function returns and rep_test finishes.
#
puts "\tRepmgr$tnum.c: Run first set of transactions at master."
eval rep_test $method $masterenv NULL $niter 0 0 0 $largs
puts "\tRepmgr$tnum.d: Verifying client database contents."
rep_verify $masterdir $masterenv $clientdir $clientenv 1 1 1
# Timeouts are in microseconds, heartbeat monitor should be
# longer than heartbeat_send.
puts "\tRepmgr$tnum.e: Set heartbeat timeouts."
$masterenv repmgr -timeout {heartbeat_send 50000}
$clientenv repmgr -timeout {heartbeat_monitor 90000}
puts "\tRepmgr$tnum.f: Run second set of transactions at master."
eval rep_test $method $masterenv NULL $niter $niter 0 0 $largs
puts "\tRepmgr$tnum.g: Verifying client database contents."
rep_verify $masterdir $masterenv $clientdir $clientenv 1 1 1
# Make sure client reacts to the lost master connection by holding an
# election. To do so, first check initial value of stats, then make
# sure it increases.
#
set init_eh [stat_field $clientenv rep_stat "Elections held"]
set init_cd [stat_field $clientenv repmgr_stat "Connections dropped"]
# Make sure client notices the lack of heartbeat. Since the client's
# heartbeat monitoring granularity is < 1 second, if we wait up to 5
# seconds that ought to give it plenty of time to notice and react.
#
puts "\tRepmgr$tnum.h: Remove master heartbeat and wait."
$masterenv repmgr -timeout {heartbeat_send 0}
set max_wait 5
await_condition {[stat_field $clientenv rep_stat \
"Elections held"] > $init_eh} $max_wait
error_check_good conndrop [expr \
[stat_field $clientenv repmgr_stat "Connections dropped"] \
> $init_cd] 1
error_check_good client_close [$clientenv close] 0
error_check_good master_close [$masterenv close] 0
}