2011-09-13 17:48:33 +00:00
|
|
|
# See the file LICENSE for redistribution information.
|
|
|
|
#
|
2012-11-14 20:13:24 +00:00
|
|
|
# Copyright (c) 2009, 2012 Oracle and/or its affiliates. All rights reserved.
|
2011-09-13 17:48:33 +00:00
|
|
|
#
|
|
|
|
# TEST repmgr026
|
|
|
|
# TEST Test of "full election" timeouts.
|
|
|
|
# TEST 1. Cold boot with all sites present.
|
|
|
|
# TEST 2. Cold boot with some sites missing.
|
|
|
|
# TEST 3. Partial-participation election with one client having seen a master,
|
|
|
|
# TEST but another just starting up fresh.
|
|
|
|
# TEST 4. Partial participation, with all participants already having seen a
|
|
|
|
# TEST master.
|
|
|
|
# TEST
|
|
|
|
|
|
|
|
proc repmgr026 { { tnum 026 } } {
|
|
|
|
source ./include.tcl
|
|
|
|
|
|
|
|
if { $is_freebsd_test == 1 } {
|
|
|
|
puts "Skipping replication manager test on FreeBSD platform."
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
foreach use_leases {no yes} {
|
|
|
|
foreach client_down {no yes} {
|
|
|
|
puts "Repmgr$tnum: Full election test, \
|
|
|
|
client_down: $client_down; leases: $use_leases"
|
|
|
|
repmgr026_sub $tnum $client_down $use_leases
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
proc repmgr026_sub { tnum client_down use_leases } {
|
|
|
|
global testdir
|
|
|
|
global repfiles_in_memory
|
|
|
|
global rep_verbose
|
|
|
|
global verbose_type
|
|
|
|
|
|
|
|
set verbargs ""
|
|
|
|
if { $rep_verbose == 1 } {
|
|
|
|
set verbargs " -verbose {$verbose_type on} "
|
|
|
|
}
|
|
|
|
|
|
|
|
set repmemargs ""
|
|
|
|
if { $repfiles_in_memory } {
|
|
|
|
set repmemargs "-rep_inmem_files "
|
|
|
|
}
|
|
|
|
|
|
|
|
env_cleanup $testdir
|
|
|
|
file mkdir [set dira $testdir/SITE_A]
|
|
|
|
file mkdir [set dirb $testdir/SITE_B]
|
|
|
|
file mkdir [set dirc $testdir/SITE_C]
|
|
|
|
file mkdir [set dird $testdir/SITE_D]
|
|
|
|
file mkdir [set dire $testdir/SITE_E]
|
|
|
|
foreach { porta portb portc portd porte } [available_ports 5] {}
|
|
|
|
|
|
|
|
# First, just create/establish the group.
|
|
|
|
puts -nonewline "Repmgr$tnum: Create a group of 5 sites: "
|
|
|
|
set common "-create -txn $verbargs $repmemargs \
|
|
|
|
-rep -thread -event"
|
|
|
|
if { $use_leases } {
|
|
|
|
append common " -rep_lease {[list 3000000]} "
|
|
|
|
}
|
|
|
|
set cmda "berkdb_env_noerr $common -errpfx SITE_A -home $dira"
|
|
|
|
set cmdb "berkdb_env_noerr $common -errpfx SITE_B -home $dirb"
|
|
|
|
set cmdc "berkdb_env_noerr $common -errpfx SITE_C -home $dirc"
|
|
|
|
set cmdd "berkdb_env_noerr $common -errpfx SITE_D -home $dird"
|
|
|
|
set cmde "berkdb_env_noerr $common -errpfx SITE_E -home $dire"
|
|
|
|
set common_mgr " -start elect \
|
|
|
|
-timeout {connection_retry 5000000} \
|
|
|
|
-timeout {election_retry 2000000} \
|
|
|
|
-timeout {full_election 60000000} \
|
|
|
|
-timeout {election 5000000} -timeout {ack 3000000}"
|
|
|
|
set enva [eval $cmda]
|
|
|
|
eval $enva repmgr $common_mgr \
|
2012-11-14 20:13:24 +00:00
|
|
|
-local {[list 127.0.0.1 $porta creator]}
|
2011-09-13 17:48:33 +00:00
|
|
|
puts -nonewline "." ; flush stdout
|
|
|
|
set envb [eval $cmdb]
|
|
|
|
eval $envb repmgr $common_mgr \
|
2012-11-14 20:13:24 +00:00
|
|
|
-local {[list 127.0.0.1 $portb]} -remote {[list 127.0.0.1 $porta]}
|
2011-09-13 17:48:33 +00:00
|
|
|
await_startup_done $envb
|
|
|
|
puts -nonewline "." ; flush stdout
|
|
|
|
set envc [eval $cmdc]
|
|
|
|
eval $envc repmgr $common_mgr \
|
2012-11-14 20:13:24 +00:00
|
|
|
-local {[list 127.0.0.1 $portc]} -remote {[list 127.0.0.1 $porta]}
|
2011-09-13 17:48:33 +00:00
|
|
|
await_startup_done $envc
|
|
|
|
puts -nonewline "." ; flush stdout
|
|
|
|
set envd [eval $cmdd]
|
|
|
|
eval $envd repmgr $common_mgr \
|
2012-11-14 20:13:24 +00:00
|
|
|
-local {[list 127.0.0.1 $portd]} -remote {[list 127.0.0.1 $porta]}
|
2011-09-13 17:48:33 +00:00
|
|
|
await_startup_done $envd
|
|
|
|
puts -nonewline "." ; flush stdout
|
|
|
|
set enve [eval $cmde]
|
|
|
|
eval $enve repmgr $common_mgr \
|
2012-11-14 20:13:24 +00:00
|
|
|
-local {[list 127.0.0.1 $porte]} -remote {[list 127.0.0.1 $porta]}
|
2011-09-13 17:48:33 +00:00
|
|
|
await_startup_done $enve
|
|
|
|
puts "."
|
|
|
|
$enve close
|
|
|
|
$envd close
|
|
|
|
$envc close
|
|
|
|
$envb close
|
|
|
|
$enva close
|
|
|
|
|
|
|
|
# Cold boot the group (with or without site E), giving site A a
|
|
|
|
# high priority.
|
|
|
|
#
|
|
|
|
|
|
|
|
# The wait_limit's are intended to be an amount that is way more than
|
|
|
|
# the expected timeout, used for nothing more than preventing the test
|
|
|
|
# from hanging forever. The leeway amount should be enough less than
|
|
|
|
# the timeout to allow for any imprecision introduced by the test
|
|
|
|
# mechanism.
|
|
|
|
#
|
|
|
|
set elect_wait_limit 25
|
|
|
|
set full_secs_leeway 59
|
|
|
|
set full_wait_limit 85
|
|
|
|
|
|
|
|
puts "\tRepmgr$tnum.a: Start first four sites."
|
|
|
|
set enva [eval $cmda]
|
2012-11-14 20:13:24 +00:00
|
|
|
eval $enva repmgr $common_mgr -pri 200 -local {[list 127.0.0.1 $porta]}
|
2011-09-13 17:48:33 +00:00
|
|
|
|
|
|
|
set envb [eval $cmdb]
|
2012-11-14 20:13:24 +00:00
|
|
|
eval $envb repmgr $common_mgr -pri 100 -local {[list 127.0.0.1 $portb]}
|
2011-09-13 17:48:33 +00:00
|
|
|
|
|
|
|
set envc [eval $cmdc]
|
2012-11-14 20:13:24 +00:00
|
|
|
eval $envc repmgr $common_mgr -pri 90 -local {[list 127.0.0.1 $portc]}
|
2011-09-13 17:48:33 +00:00
|
|
|
|
|
|
|
set envd [eval $cmdd]
|
2012-11-14 20:13:24 +00:00
|
|
|
eval $envd repmgr $common_mgr -pri 80 -local {[list 127.0.0.1 $portd]}
|
2011-09-13 17:48:33 +00:00
|
|
|
|
|
|
|
if { $client_down } {
|
|
|
|
set enve NONE
|
|
|
|
} else {
|
|
|
|
puts "\tRepmgr$tnum.b: Start fifth site."
|
|
|
|
set enve [eval $cmde]
|
|
|
|
eval $enve repmgr $common_mgr -pri 50 \
|
2012-11-14 20:13:24 +00:00
|
|
|
-local {[list 127.0.0.1 $porte]}
|
2011-09-13 17:48:33 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
# wait for results, and make sure they're correct
|
|
|
|
#
|
|
|
|
set envlist [list $enva $envb $envc $envd]
|
|
|
|
if { $enve != "NONE" } {
|
|
|
|
lappend envlist $enve
|
|
|
|
}
|
|
|
|
set limit $full_wait_limit
|
|
|
|
puts "\tRepmgr$tnum.c: wait (up to $limit seconds) for first election."
|
|
|
|
set t [repmgr026_await_election_result $envlist $limit]
|
|
|
|
if { $client_down } {
|
|
|
|
error_check_good slow_election [expr $t > $full_secs_leeway] 1
|
|
|
|
} else {
|
|
|
|
# When all sites participate, the election should finish in way
|
|
|
|
# less than 60 seconds.
|
|
|
|
#
|
|
|
|
error_check_good timely_election [expr $t < $full_secs_leeway] 1
|
|
|
|
}
|
|
|
|
puts "\tRepmgr$tnum.d: first election completed in $t seconds"
|
|
|
|
|
|
|
|
puts "\tRepmgr$tnum.e: wait for start-up done"
|
|
|
|
$enva event_info -clear
|
|
|
|
await_startup_done $envb
|
|
|
|
$envb event_info -clear
|
|
|
|
await_startup_done $envc
|
|
|
|
$envc event_info -clear
|
|
|
|
await_startup_done $envd
|
|
|
|
$envd event_info -clear
|
|
|
|
if { $enve != "NONE" } {
|
|
|
|
await_startup_done $enve
|
|
|
|
$enve event_info -clear
|
|
|
|
}
|
|
|
|
|
|
|
|
# Shut down site A, in order to test elections with less than the whole
|
|
|
|
# group voting. However, normally repmgr's reaction to losing master
|
|
|
|
# connection is to try a "fast election" (the n-1 trick). So we must do
|
|
|
|
# something to mitigate that (see below).
|
|
|
|
#
|
|
|
|
puts "\tRepmgr$tnum.f: shut down master site A"
|
|
|
|
if { $client_down } {
|
|
|
|
# The fifth site is already down, so now we'll have just B, C,
|
|
|
|
# and D running. Therefore, even with repmgr pulling its "fast
|
|
|
|
# election" (n-1) trick, we don't have enough votes for a
|
|
|
|
# full-participation short circuit; so this is a valid test of
|
|
|
|
# the "normal" election timeout.
|
|
|
|
#
|
|
|
|
$enva close
|
|
|
|
} else {
|
|
|
|
# Here all sites are running, so if we just killed the master
|
|
|
|
# repmgr would invoke its "fast election" trick, resulting in no
|
|
|
|
# timeout. Since the purpose of this test is to ensure the
|
|
|
|
# correct use of timeouts, that's no good. Instead, let's first
|
|
|
|
# kill one more other site.
|
|
|
|
$enve close
|
|
|
|
$enva close
|
|
|
|
}
|
|
|
|
|
|
|
|
# wait for results, and check them
|
|
|
|
#
|
|
|
|
set envlist [list $envb $envc $envd]
|
|
|
|
set limit $elect_wait_limit
|
|
|
|
puts "\tRepmgr$tnum.h: wait (up to $limit seconds) for second election."
|
|
|
|
set t [repmgr026_await_election_result $envlist $limit]
|
|
|
|
error_check_good normal_election [expr $t < $full_secs_leeway] 1
|
|
|
|
puts "\tRepmgr$tnum.i: second election completed in $t seconds"
|
|
|
|
|
|
|
|
$envd close
|
|
|
|
$envc close
|
|
|
|
$envb close
|
|
|
|
}
|
|
|
|
|
|
|
|
# Wait (a limited amount of time) for the election to finish. The first env
|
|
|
|
# handle in the list is the expected winner, and the others are the remaining
|
|
|
|
# clients. Returns the approximate amount of time (in seconds) that the
|
|
|
|
# election took.
|
|
|
|
#
|
|
|
|
proc repmgr026_await_election_result { envlist limit } {
|
|
|
|
set begin [clock seconds]
|
|
|
|
set deadline [expr $begin + $limit]
|
|
|
|
while { true } {
|
|
|
|
set t [clock seconds]
|
|
|
|
if { $t > $deadline } {
|
|
|
|
error "FAIL: time limit exceeded"
|
|
|
|
}
|
|
|
|
|
|
|
|
if { [repmgr026_is_ready $envlist] } {
|
|
|
|
return [expr $t - $begin]
|
|
|
|
}
|
|
|
|
|
|
|
|
tclsleep 1
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
proc repmgr026_is_ready { envlist } {
|
|
|
|
set winner [lindex $envlist 0]
|
|
|
|
if {![is_elected $winner]} {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
foreach client [lrange $envlist 1 end] {
|
|
|
|
if {![is_event_present $client newmaster]} {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return true
|
|
|
|
}
|