libdb/test/tcl/rep094.tcl

241 lines
6.7 KiB
Tcl
Raw Normal View History

2011-09-13 17:44:24 +00:00
# See the file LICENSE for redistribution information.
#
# Copyright (c) 2010, 2011 Oracle and/or its affiliates. All rights reserved.
#
# TEST rep094
# TEST Full election with less than majority initially connected.
#
# TEST Cold-boot a 4-site group. The first two sites start quickly and
# TEST initiate an election. The other two sites don't join the election until
# TEST the middle of the long full election timeout period. It's important that
# TEST the number of sites that start immediately be a sub-majority, because
# TEST that's the case that used to have a bug in it [#18456].
#
proc rep094 { method { tnum "094" } args } {
source ./include.tcl
# Skip for all methods except btree.
if { $checking_valid_methods } {
set test_methods { btree }
return $test_methods
}
if { [is_btree $method] == 0 } {
puts "Rep$tnum: Skipping for method $method."
return
}
rep094_sub $method $tnum
}
proc rep094_sub { method tnum } {
global rep_verbose
global testdir
global verbose_type
global repfiles_in_memory
global elect_serial
if { $repfiles_in_memory } {
set repmemargs "-rep_inmem_files "
set msg ""
} else {
set repmemargs ""
set msg ", with in-memory replication files"
}
puts "Rep$tnum: Full election starting with minority$msg."
set verbargs ""
if { $rep_verbose == 1 } {
set verbargs " -verbose {$verbose_type on} "
}
env_cleanup $testdir
replsetup [set qdir $testdir/MSGQUEUEDIR]
set dira $testdir/SITEA
set dirb $testdir/SITEB
set dirc $testdir/SITEC
set dird $testdir/SITED
file mkdir $dira
file mkdir $dirb
file mkdir $dirc
file mkdir $dird
puts "\tRep$tnum.a: Boot first two sites."
repladd 1
set envcmda "berkdb_env_noerr -create -txn -errpfx SITEA \
$repmemargs -event \
$verbargs -home $dira -rep_transport \[list 1 replsend\]"
set enva [eval $envcmda -rep_client]
repladd 2
set envcmdb "berkdb_env_noerr -create -txn -errpfx SITEB \
$repmemargs -event \
$verbargs -home $dirb -rep_transport \[list 2 replsend\]"
set envb [eval $envcmdb -rep_client]
set envlist "{$enva 1} {$envb 2}"
process_msgs $envlist
# Start an election at site A, with a generous full-election timeout (3
# minutes). (Note that specifying the timeout as a two-element list is
# the (only) way to set a full election timeout via start_election.)
#
repladd 3
repladd 4
puts "\tRep$tnum.b: Start an election at site A."
set pri 100
set nsites 4
set nvotes 3
set envid 1
set timeout [list [expr 10 * 1000000] [expr 180 * 1000000]]
set elect_serial 1
set pfx "A.1"
start_election $pfx $qdir $dira $envid $nsites $nvotes $pri $timeout
set elect_pipe($envid) $elect_serial
# The standard run_election proc is not flexible enough for our needs
# here, so we conduct our own customized version of the loop here.
# What's different about our case here is that we want to start sites C
# and D during the midst of the phase 1 timeout period.
#
set wait_limit 20
set done false
for { set count 0 } { $count < $wait_limit && !$done} { incr count } {
foreach pair $envlist {
set env [lindex $pair 0]
set envid [lindex $pair 1]
if { [info exists elect_pipe($envid)] } {
check_election $elect_pipe($envid) \
unavail child_elected
} else {
set child_elected false
}
set parent_elected [is_elected $env]
if { $child_elected || $parent_elected } {
error "FAIL: election succeeded unexpectedly"
}
replprocessqueue $env $envid 0 he
if { $he } {
incr elect_serial
set envpfx [$env get_errpfx]
set pfx "$envpfx.$elect_serial"
puts "\tRep$tnum.c: Starting another\
election $pfx at $envpfx."
set dir [$env get_home]
start_election $pfx $qdir $dir $envid \
$nsites $nvotes $pri $timeout
set elect_pipe($envid) $elect_serial
}
}
if { $count >= 10 && $he } {
set done true
} else {
tclsleep 1
}
}
# Now that we've waited 10 seconds, start the "slow-booting"
# sites C and D.
#
puts "\tRep$tnum.d: Boot third site, have it join election."
set envcmdc "berkdb_env_noerr -create -txn -errpfx SITEC \
$repmemargs -event \
$verbargs -home $dirc -rep_transport \[list 3 replsend\]"
set envc [eval $envcmdc -rep_client]
set envlist "{$enva 1} {$envb 2} {$envc 3}"
process_msgs $envlist
incr elect_serial
set env $envc
set envid 3
set envpfx [$env get_errpfx]
set pfx "$envpfx.$elect_serial"
puts "\tRep$tnum.e: Starting straggler election $pfx at $envpfx."
set dir [$env get_home]
start_election $pfx $qdir $dir $envid $nsites $nvotes $pri $timeout
set elect_pipe($envid) $elect_serial
for { set count 0 } { $count < 5 } { incr count } {
process_msgs $envlist
tclsleep 1
}
puts "\tRep$tnum.f: Boot fourth site, join election."
set envcmdd "berkdb_env_noerr -create -txn -errpfx SITED \
$repmemargs -event \
$verbargs -home $dird -rep_transport \[list 4 replsend\]"
set envd [eval $envcmdd -rep_client]
set envlist "{$enva 1} {$envb 2} {$envc 3} {$envd 4}"
process_msgs $envlist
incr elect_serial
set env $envd
set envid 4
set envpfx [$env get_errpfx]
set pfx "$envpfx.$elect_serial"
puts "\tRep$tnum.g: Starting straggler election $pfx at $envpfx."
set dir [$env get_home]
start_election $pfx $qdir $dir $envid $nsites $nvotes $pri $timeout
set elect_pipe($envid) $elect_serial
process_msgs $envlist
# The election should now complete promptly. We'll give it 60 seconds,
# just in case this test is running on a slow overloaded system. That's
# still way less than the 3 minutes total full election timeout, so if
# we finish before 60 seconds, it means we succeeded.
#
set wait_limit 60
set master -1
for { set count 0 } { $count < $wait_limit } { incr count } {
set finishers 0
set synced_clients 0
foreach pair $envlist {
set env [lindex $pair 0]
set envid [lindex $pair 1]
if { [info exists elect_pipe($envid)] } {
if {[check_election $elect_pipe($envid) \
unavail child_elected]} {
incr finishers
}
} else {
set child_elected false
}
set parent_elected [is_elected $env]
if { ( $child_elected || $parent_elected ) &&
$master == -1 } {
set master $envid
$env rep_start -master
}
replprocessqueue $env $envid 0 he
if { $he } {
error "FAIL: got HOLDELECTION unexpectedly"
}
# Once we've elected a master, start checking for
# startupdone at the clients, just so's we can
# gracefully shut everything down.
#
if { $master > 0 && $envid != $master &&
[stat_field $env rep_stat "Startup complete"] } {
incr synced_clients
}
}
if { $finishers == $nsites && $synced_clients == $nsites - 1 } {
break;
}
tclsleep 1
}
error_check_good got_newmaster [expr $master > 0] 1
cleanup_elections
$enva close
$envb close
$envc close
$envd close
replclose $qdir
}