libdb/test/tcl/rep067.tcl
2012-11-14 15:13:24 -05:00

323 lines
9.5 KiB
Tcl

# See the file LICENSE for redistribution information.
#
# Copyright (c) 2002, 2012 Oracle and/or its affiliates. All rights reserved.
#
# TEST rep067
# TEST Full election timeout test.
# TEST
# TEST Verify that elections use a separate "full election timeout" (if such
# TEST configuration is in use) instead of the normal timeout, when the
# TEST replication group is "cold-booted" (all sites starting with recovery).
# TEST
proc rep067 { method args } {
source ./include.tcl
set tnum "067"
# Run for btree only.
if { $checking_valid_methods } {
set test_methods { btree }
return $test_methods
}
if { [is_btree $method] == 0 } {
puts "Rep$tnum: Skipping for method $method."
return
}
puts "Rep$tnum: Full election timeout test."
# This test consists of three cases, two of which can be handled by
# script that is similar enough to be handled by a single proc
# (rep067a_sub), with a parameter to determine whether a client is
# down. The other case is different enough to warrant its own proc
# (rep067b_sub).
#
rep067a_sub $tnum yes
rep067a_sub $tnum no
rep067b_sub $tnum
}
# Cold boot the group. Sites A and B come up just fine, but site C might not
# come up (depending on the client_down flag). Hold an election. (The amount
# of time it takes depends on whether site C is running.) Then, shut down site
# A, start site C if it isn't already running, and hold another election.
#
proc rep067a_sub { tnum client_down } {
source ./include.tcl
global rand_init
error_check_good set_random_seed [berkdb srand $rand_init] 0
global repfiles_in_memory
global rep_verbose
global verbose_type
# Set up some arbitrary timeout values for this test. The only
# constraint is that they should be large enough, and different enough,
# so as to allow for some amount of measurement imprecision introduced
# by the overhead of the test mechnism. Timeout values themselves
# expressed in microseconds, since they'll be passed to DB; leeway
# values in seconds, so that we can measure the result here in Tcl.
#
set elect_to 15000000
set elect_secs_leeway 13
set full_elect_to 30000000
set full_secs_leeway 27
puts -nonewline "Rep$tnum.a: Full election test, "
if { $client_down } {
puts "with one client missing"
puts -nonewline "\tRep$tnum.b: First election"
puts " expected to take [expr $full_elect_to / 1000000] seconds"
} else {
puts "with all clients initially present"
puts "\tRep$tnum.b: First election expected to complete quickly"
}
set verbargs ""
if { $rep_verbose == 1 } {
set verbargs " -verbose {$verbose_type on} "
}
set repmemargs ""
if { $repfiles_in_memory } {
set repmemargs "-rep_inmem_files "
}
env_cleanup $testdir
set qdir $testdir/MSGQUEUEDIR
replsetup $qdir
# Configure all three clients. Use EID's starting at 2, because that's
# what run_election expects.
#
set nsites 3
foreach i { 0 1 2 } eid { 2 3 4 } p { 20 50 100 } {
set clientdir($i) $testdir/CLIENTDIR.$i
file mkdir $clientdir($i)
repladd $eid
set env_cmd($i) "berkdb_env_noerr -create \
-event $repmemargs -home $clientdir($i) \
-txn -rep_client $verbargs \
-errpfx CLIENT.$i -rep_transport \[list $eid replsend\]"
set errcmd($i) "none"
set crash($i) 0
set pri($i) $p
}
set elect_timeout [list $elect_to $full_elect_to]
# Start the clients, but perhaps not all of them.
#
set envlist {}
if { $client_down } {
set participants 2
} else {
set participants 3
}
for { set i 0 } { $i < $participants } { incr i } {
set clientenv($i) [eval $env_cmd($i)]
set eid [expr $i + 2]
lappend envlist "$clientenv($i) $eid"
}
process_msgs $envlist
# In this test, the expected winner is always the last one in the
# array. We made sure of that by arranging the priorities that way.
# This is convenient so that we can remove the winner (master) in the
# second phase, without leaving a hole in the arrays that the
# run_election proc wouldn't cope with.
#
set winner [expr $participants - 1]
set initiator 0
set nvotes 2
set reopen_flag 0
run_election envlist errcmd pri crash \
$qdir "Rep$tnum.c" $initiator $nsites $nvotes $participants \
$winner $reopen_flag NULL 0 0 $elect_timeout
set duration [rep067_max_duration $envlist]
puts "\tRep$tnum.d: the election took about $duration seconds"
if { $client_down } {
# Case #2.
#
# Without full participation on a cold boot, the election should
# take the full long timeout. In any case it should be way more
# than the "normal" timeout.
#
error_check_good duration1a \
[expr $duration > $full_secs_leeway] 1
} else {
# Case #1.
#
# With full participation, the election should complete "right
# away". At least it should be way less than the "normal"
# election timeout.
error_check_good duration1b \
[expr $duration < $elect_secs_leeway] 1
}
process_msgs $envlist
if { !$client_down } {
# Shut down the master and hold another election between the
# remaining two sites.
#
puts "\tRep$tnum.e: Shut down elected master, and run another election"
puts "\tRep$tnum.g: (expected to take [expr $elect_to / 1000000] seconds)"
$clientenv($winner) close
set envlist [lreplace $envlist $winner $winner]
set winner 1
set participants 2
run_election envlist errcmd pri crash \
$qdir "Rep$tnum.b" $initiator $nsites $nvotes \
$participants $winner $reopen_flag NULL 0 0 $elect_timeout
set duration [rep067_max_duration $envlist]
# We don't have full participation, so the election can only be
# won after a timeout. But these clients have seen a master, so
# we shouldn't have to wait for the full-election timeout.
#
puts "\tRep$tnum.g: the election took about $duration seconds"
error_check_good duration2 \
[expr $duration > $elect_secs_leeway && \
$duration < $full_secs_leeway] 1
}
$clientenv(0) close
$clientenv(1) close
replclose $testdir/MSGQUEUEDIR
}
# Run an election where one of the clients has seen a master, but the other has
# not. Verify that the first client learns from the second that a master has
# been seen, and allows the election to complete after the normal timeout,
# rather than the full election timeout.
#
proc rep067b_sub { tnum } {
source ./include.tcl
global rand_init
global repfiles_in_memory
global rep_verbose
global verbose_type
error_check_good set_random_seed [berkdb srand $rand_init] 0
set elect_to 10000000
set elect_secs_leeway 10
set full_elect_to 180000000
set full_secs_leeway 100
puts "Rep$tnum.a: Mixed full election test"
set verbargs ""
if { $rep_verbose == 1 } {
set verbargs " -verbose {$verbose_type on} "
}
set repmemargs ""
if { $repfiles_in_memory } {
set repmemargs "-rep_inmem_files "
}
env_cleanup $testdir
set qdir $testdir/MSGQUEUEDIR
replsetup $qdir
# Start a master and one client. This first step is just setup, for the
# purpose of creating a client that has heard from a master.
#
file mkdir $testdir/MASTERDIR
set mcmd "berkdb_env_noerr -create \
-event $repmemargs -home $testdir/MASTERDIR \
-txn -rep_master $verbargs \
-errpfx MASTER -rep_transport \[list 1 replsend\]"
file mkdir $testdir/CLIENTDIR
set ccmd "berkdb_env_noerr -create \
-event $repmemargs -home $testdir/CLIENTDIR \
-txn -rep_client $verbargs \
-errpfx CLIENT.0 -rep_transport \[list 2 replsend\]"
puts "\tRep$tnum.b: Start master and first client"
repladd 1
set menv [eval $mcmd]
repladd 2
set cenv [eval $ccmd]
process_msgs [list [list $menv 1] [list $cenv 2]]
puts "\tRep$tnum.c: Shut down master; start other client"
$menv close
# Now set up for the election test we're really interested in. We'll
# need $ccmd in array position 0 of env_cmd, for passing to
# run_election. Then, start the second client. We now have a mixture
# of clients: one who's seen a master, and the other who hasn't.
#
# The run_election proc assumes an offset of 2 between the array index
# and the EID. Thus EID 3 has to correspond to array index 1, etc.
#
set env_cmd(0) $ccmd
repladd 3
file mkdir $testdir/CLIENTDIR2
set env_cmd(1) "berkdb_env_noerr -create \
-event $repmemargs -home $testdir/CLIENTDIR2 \
-txn -rep_client $verbargs \
-errpfx CLIENT.1 -rep_transport \[list 3 replsend\]"
set c2env [eval $env_cmd(1)]
set envlist {}
foreach i { 0 1 } eid { 2 3 } p { 100 50 } e [list $cenv $c2env] {
set errcmd($i) "none"
set crash($i) 0
set pri($i) $p
lappend envlist [list $e $eid]
}
set elect_timeout [list $elect_to $full_elect_to]
set nsites 3
set participants 2
process_msgs $envlist
puts "\tRep$tnum.d: Election expected to take [expr $elect_to / 1000000] seconds"
set winner 0
set initiator 0
set nvotes 2
set reopen_flag 0
run_election envlist errcmd pri crash \
$qdir "Rep$tnum.e" $initiator $nsites $nvotes $participants \
$winner $reopen_flag NULL 0 0 $elect_timeout
set duration [rep067_max_duration $envlist]
puts "\tRep$tnum.f: the election took about $duration seconds"
# We don't have full participation, so the election can only be won
# after a timeout. But even if only one client has seen a master, we
# shouldn't have to wait for the full-election timeout.
#
error_check_good duration3 \
[expr $duration > $elect_secs_leeway && \
$duration < $full_secs_leeway] 1
$cenv close
$c2env close
replclose $testdir/MSGQUEUEDIR
}
proc rep067_max_duration { envlist } {
set max 0.0
foreach pair $envlist {
set env [lindex $pair 0]
set s [stat_field $env rep_stat "Election seconds"]
set u [stat_field $env rep_stat "Election usecs"]
set d [expr ( $u / 1000000.0 ) + $s ]
if { $d > $max } {
set max $d
}
}
return $max
}