libdb/test/tcl/rep034.tcl

398 lines
13 KiB
Tcl

# See the file LICENSE for redistribution information.
#
# Copyright (c) 2004, 2011 Oracle and/or its affiliates. All rights reserved.
#
# $Id$
#
# TEST rep034
# TEST Test of STARTUPDONE notification.
# TEST
# TEST STARTUPDONE can now be recognized without the need for new "live" log
# TEST records from the master (under favorable conditions). The response to
# TEST the ALL_REQ at the end of synchronization includes an end-of-log marker
# TEST that now triggers it. However, the message containing that end marker
# TEST could get lost, so live log records still serve as a back-up mechanism.
# TEST The end marker may also be set under c2c sync, but only if the serving
# TEST client has itself achieved STARTUPDONE.
#
proc rep034 { method { niter 2 } { tnum "034" } args } {
source ./include.tcl
global databases_in_memory
global repfiles_in_memory
global env_private
# Valid for all access methods.
if { $checking_valid_methods } {
return "ALL"
}
# Set up for on-disk or in-memory databases.
set msg "using on-disk databases"
if { $databases_in_memory } {
set msg "using named in-memory databases"
if { [is_queueext $method] } {
puts -nonewline "Skipping rep$tnum for method "
puts "$method with named in-memory databases."
return
}
}
set msg2 "and on-disk replication files"
if { $repfiles_in_memory } {
set msg2 "and in-memory replication files"
}
set msg3 ""
if { $env_private } {
set msg3 "with private env"
}
set args [convert_args $method $args]
set logsets [create_logsets 3]
foreach l $logsets {
puts "Rep$tnum ($method $args): Test of\
startup synchronization detection $msg $msg2 $msg3."
puts "Rep$tnum: Master logs are [lindex $l 0]"
puts "Rep$tnum: Client 0 logs are [lindex $l 1]"
puts "Rep$tnum: Client 1 logs are [lindex $l 2]"
rep034_sub $method $niter $tnum $l $args
}
}
# This test manages on its own the decision of whether or not to open an
# environment with recovery. (It varies throughout the test.) Therefore there
# is no need to run it twice (as we often do with a loop in the main proc).
#
proc rep034_sub { method niter tnum logset largs } {
global anywhere
global testdir
global databases_in_memory
global repfiles_in_memory
global env_private
global rep_verbose
global verbose_type
global rep034_got_allreq
set verbargs ""
if { $rep_verbose == 1 } {
set verbargs " -verbose {$verbose_type on} "
}
set repmemargs ""
if { $repfiles_in_memory } {
set repmemargs "-rep_inmem_files "
}
set privargs ""
if { $env_private == 1 } {
set privargs " -private "
}
env_cleanup $testdir
replsetup $testdir/MSGQUEUEDIR
set masterdir $testdir/MASTERDIR
set clientdir $testdir/CLIENTDIR
set clientdir2 $testdir/CLIENTDIR2
file mkdir $masterdir
file mkdir $clientdir
file mkdir $clientdir2
set m_logtype [lindex $logset 0]
set c_logtype [lindex $logset 1]
set c2_logtype [lindex $logset 2]
# In-memory logs require a large log buffer, and cannot
# be used with -txn nosync.
set m_logargs [adjust_logargs $m_logtype]
set c_logargs [adjust_logargs $c_logtype]
set c2_logargs [adjust_logargs $c2_logtype]
set m_txnargs [adjust_txnargs $m_logtype]
set c_txnargs [adjust_txnargs $c_logtype]
set c2_txnargs [adjust_txnargs $c2_logtype]
# In first part of test master serves requests.
#
set anywhere 0
# Create a master; add some data.
#
repladd 1
set ma_envcmd "berkdb_env_noerr -create $m_txnargs $m_logargs \
-event $verbargs -errpfx MASTER $repmemargs $privargs \
-home $masterdir -rep_master -rep_transport \[list 1 replsend\]"
set masterenv [eval $ma_envcmd]
puts "\tRep$tnum.a: Create master; add some data."
eval rep_test $method $masterenv NULL $niter 0 0 0 $largs
# Bring up a new client, and see that it can get STARTUPDONE with no new
# live transactions at the master.
#
puts "\tRep$tnum.b: Bring up client; check STARTUPDONE."
repladd 2
set cl_envcmd "berkdb_env_noerr -create $c_txnargs $c_logargs \
-event $verbargs -errpfx CLIENT $repmemargs $privargs \
-home $clientdir -rep_client -rep_transport \[list 2 replsend\]"
set clientenv [eval $cl_envcmd]
set envlist "{$masterenv 1} {$clientenv 2}"
process_msgs $envlist
error_check_good done_without_live_txns \
[stat_field $clientenv rep_stat "Startup complete"] 1
# Test that the event got fired as well. In the rest of the test things
# get a little complex (what with having two clients), so only check the
# event part here. The important point is the various ways that
# STARTUPDONE can be computed, so testing the event firing mechanism
# just this once is enough.
#
error_check_good done_event_too [is_startup_done $clientenv] 1
#
# Bring up another client. Do additional new txns at master, ensure
# that STARTUPDONE is not triggered at NEWMASTER LSN.
#
puts "\tRep$tnum.c: Another client; no STARTUPDONE at NEWMASTER LSN."
set newmaster_lsn [next_expected_lsn $masterenv]
repladd 3
#
# !!! Please note that we're giving client2 a special customized version
# of the replication transport call-back function.
#
set cl2_envcmd "berkdb_env_noerr -create $c2_txnargs $c2_logargs \
-event $verbargs -errpfx CLIENT2 $repmemargs $privargs \
-home $clientdir2 -rep_client -rep_transport \[list 3 rep034_send\]"
set client2env [eval $cl2_envcmd]
set envlist "{$masterenv 1} {$clientenv 2} {$client2env 3}"
set verified false
for {set i 0} {$i < 10} {incr i} {
proc_msgs_once $envlist
set client2lsn [next_expected_lsn $client2env]
# Get to the point where we've gone past where the master's LSN
# was at NEWMASTER time, and make sure we haven't yet gotten
# STARTUPDONE. Ten loop iterations should be plenty.
#
if {[$client2env log_compare $client2lsn $newmaster_lsn] > 0} {
if {![stat_field \
$client2env rep_stat "Startup complete"]} {
set verified true
}
break;
}
eval rep_test $method $masterenv NULL $niter 0 0 0 $largs
}
error_check_good no_newmaster_trigger $verified true
process_msgs $envlist
error_check_good done_during_live_txns \
[stat_field $client2env rep_stat "Startup complete"] 1
#
# From here on out we use client-to-client sync.
#
set anywhere 1
# Here we rely on recovery at client 1. If that client is running with
# in-memory logs or in-memory databases, forgo the remainder of the test.
#
if {$c_logtype eq "in-mem" || $databases_in_memory } {
puts "\tRep$tnum.d: Skip the rest of the test for\
in-memory logging or databases."
$masterenv close
$clientenv close
$client2env close
replclose $testdir/MSGQUEUEDIR
return
}
# Shut down client 1. Bring it back, with recovery. Verify that it can
# get STARTUPDONE by syncing to other client, even with no new master
# txns.
#
puts "\tRep$tnum.d: Verify STARTUPDONE using c2c sync."
$clientenv close
set clientenv [eval $cl_envcmd -recover]
set envlist "{$masterenv 1} {$clientenv 2} {$client2env 3}"
# Clear counters at client2, so that we can check "Client service
# requests" in a moment.
#
$client2env rep_stat -clear
process_msgs $envlist
error_check_good done_via_c2c \
[stat_field $clientenv rep_stat "Startup complete"] 1
#
# Make sure our request was served by client2. This isn't a test of c2c
# sync per se, but if this fails it indicates that we're not really
# testing what we thought we were testing.
#
error_check_bad c2c_served_by_master \
[stat_field $client2env rep_stat "Client service requests"] 0
# Verify that we don't get STARTUPDONE if we are using c2c sync to
# another client, and the serving client has not itself reached
# STARTUPDONE, because that suggests that the serving client could be
# way far behind. But that we can still eventually get STARTUPDONE, as
# a fall-back, once the master starts generating new txns again.
#
# To do so, we'll need to restart both clients. Start with the client
# that will serve the request. Turn off "anywhere" process for a moment
# so that we can get this client set up without having the other one
# running.
#
# Now it's client 2 that needs recovery. Forgo the rest of the test if
# it is logging in memory. (We could get this far in mixed mode, with
# client 1 logging on disk.)
#
if {$c2_logtype eq "in-mem"} {
puts "\tRep$tnum.e: Skip rest of test for in-memory logging."
$masterenv close
$clientenv close
$client2env close
replclose $testdir/MSGQUEUEDIR
return
}
puts "\tRep$tnum.e: Check no STARTUPDONE when c2c server is behind."
$clientenv log_flush
$clientenv close
$client2env log_flush
$client2env close
set anywhere 0
set client2env [eval $cl2_envcmd -recover]
set envlist "{$masterenv 1} {$client2env 3}"
# We want client2 to get partway through initialization, but once it
# sends the ALL_REQ to the master, we want to cut things off there.
# Recall that we gave client2 a special "wrapper" version of the
# replication transport call-back function: that function will set a
# flag when it sees an ALL_REQ message go by.
#
set rep034_got_allreq false
while { !$rep034_got_allreq } {
proc_msgs_once $envlist
}
#
# To make sure we're doing a valid test, verify that we really did
# succeed in getting the serving client into the state we intended.
#
error_check_good serve_from_notstarted \
[stat_field $client2env rep_stat "Startup complete"] 0
# Start up the client to be tested. Make sure it doesn't get
# STARTUPDONE (yet). Again, the checking of service request stats is
# just for test debugging, to make sure we have a valid test.
#
# To add insult to injury, not only do we not get STARTUPDONE from the
# "behind" client, we also don't even get all the log records we need
# (because we didn't allow client2's ALL_REQ to get to the master).
# And no mechanism to let us know that. The only resolution is to wait
# for gap detection to rerequest (which would then go to the master).
# So, set a small rep_request upper bound, so that it doesn't take a ton
# of new live txns to reach the trigger.
#
set anywhere 1
$client2env rep_stat -clear
replclear 2
set clientenv [eval $cl_envcmd -recover]
#
# Set to 400 usecs. An average ping to localhost should
# be a few 10s usecs.
#
$clientenv rep_request 400 400
set envlist "{$masterenv 1} {$clientenv 2} {$client2env 3}"
# Here we're expecting that the master isn't generating any new log
# records, which is normally the case since we're not generating any new
# transactions there. This is important, because otherwise the client
# could notice its log gap and request the missing records, resulting in
# STARTUPDONE before we're ready for it. When debug_rop is on, just
# scanning the data-dir during UPDATE_REQ processing (which, remember,
# now happens just to check for potential NIMDB re-materialization)
# generates log records, as we open each file we find to see if it's a
# database. So, filter out LOG messages (simulating them being "lost")
# temporarily.
#
if {[is_substr [berkdb getconfig] "debug_rop"]} {
$masterenv rep_transport {1 rep034_send_nolog}
}
while {[rep034_proc_msgs_once $masterenv $clientenv $client2env] > 0} {}
$masterenv rep_transport {1 replsend}
error_check_good not_from_undone_c2c_client \
[stat_field $clientenv rep_stat "Startup complete"] 0
error_check_bad c2c_served_by_master \
[stat_field $client2env rep_stat "Client service requests"] 0
# Verify that we nevertheless *do* get STARTUPDONE after the master
# starts generating new txns again. Generate two sets of transactions,
# with an unmistakable pause between, to ensure that we trigger the
# client's rerequest timer, which we need in order to pick up the
# missing transactions. The 400 usec is a nice short time; but on
# Windows sometimes it's possible to blast through a single process_msgs
# cycle so quickly that its low-resolution timer reflects no elapsed
# time at all!
#
puts "\tRep$tnum.f: Check STARTUPDONE via fall-back to live txns."
eval rep_test $method $masterenv NULL $niter 0 0 0 $largs
process_msgs $envlist
tclsleep 1
eval rep_test $method $masterenv NULL $niter 0 0 0 $largs
process_msgs $envlist
error_check_good fallback_live_txns \
[stat_field $clientenv rep_stat "Startup complete"] 1
$masterenv close
$clientenv close
$client2env close
replclose $testdir/MSGQUEUEDIR
set anywhere 0
}
# Do a round of message processing, but juggle things such that client2 can
# never receive a message from the master.
#
# Assumes the usual "{$masterenv 1} {$clientenv 2} {$client2env 3}" structure.
#
proc rep034_proc_msgs_once { masterenv clientenv client2env } {
set nproced [proc_msgs_once "{$masterenv 1}" NONE err]
error_check_good pmonce_1 $err 0
replclear 3
incr nproced [proc_msgs_once "{$clientenv 2} {$client2env 3}" NONE err]
error_check_good pmonce_2 $err 0
return $nproced
}
# Wrapper for replsend. Mostly just a pass-through to the real replsend, except
# we watch for an ALL_REQ, and just set a flag when we see it.
#
proc rep034_send { control rec fromid toid flags lsn } {
global rep034_got_allreq
if {[berkdb msgtype $control] eq "all_req"} {
set rep034_got_allreq true
}
return [replsend $control $rec $fromid $toid $flags $lsn]
}
# Another slightly different wrapper for replsend. This one simulates losing
# any broadcast LOG messages from the master.
#
proc rep034_send_nolog { control rec fromid toid flags lsn } {
if {[berkdb msgtype $control] eq "log" &&
$fromid == 1 && $toid == -1} {
set result 0
} else {
set result [replsend $control $rec $fromid $toid $flags $lsn]
}
return $result
}